blob: 9b906d8afee45797f57b85485ff065ebc3fb2879 [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alexe0c5b9e2019-04-23 18:51:23 -05003import json
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06004import os
Alex1f90e7b2021-09-03 15:31:28 -05005import yaml
Alex3ebc5632019-04-18 16:47:18 -05006from copy import deepcopy
Alex9a4ad212020-10-01 18:04:25 -05007from multiprocessing.dummy import Pool
Alex1f90e7b2021-09-03 15:31:28 -05008from time import sleep
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06009
Alex9a4ad212020-10-01 18:04:25 -050010from cfg_checker.clients import get_salt_remote, get_kube_remote
11from cfg_checker.common.const import all_salt_roles_map, all_kube_roles_map
Alexe9908f72020-05-19 16:04:53 -050012from cfg_checker.common.const import NODE_UP, NODE_DOWN, NODE_SKIP
Alex9a4ad212020-10-01 18:04:25 -050013from cfg_checker.common.const import ubuntu_versions, nova_openstack_versions
Alex7c9494e2019-04-22 10:40:59 -050014from cfg_checker.common import logger, logger_cli
Alexe0c5b9e2019-04-23 18:51:23 -050015from cfg_checker.common import utils
Alex9a4ad212020-10-01 18:04:25 -050016from cfg_checker.common.file_utils import create_temp_file_with_content
17from cfg_checker.common.exception import SaltException, KubeException
18from cfg_checker.common.ssh_utils import PortForward, SshShell
19from cfg_checker.common.settings import pkg_dir, ENV_TYPE_KUBE, ENV_TYPE_SALT
20from cfg_checker.helpers.console_utils import Progress
21
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060022
23node_tmpl = {
24 'role': '',
25 'node_group': '',
Alexe9908f72020-05-19 16:04:53 -050026 'status': NODE_DOWN,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060027 'pillars': {},
Alex9a4ad212020-10-01 18:04:25 -050028 'grains': {},
29 'raw': {}
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060030}
31
32
Alex9a4ad212020-10-01 18:04:25 -050033def _prepare_skipped_nodes(_names, skip_list, skip_list_file):
34 _skipped_minions = []
35 # skip list file
36 if skip_list_file:
37 _valid, _invalid = utils.get_nodes_list(skip_list_file)
Alex9a4ad212020-10-01 18:04:25 -050038 _skipped_minions.extend(_valid)
Alex359e5752021-08-16 17:28:30 -050039 if len(_invalid) < 1:
40 logger_cli.info(
41 "\n# WARNING: Detected invalid entries "
42 "in nodes skip list:\n{}\n".format(
43 "\n".join(_invalid)
44 )
45 )
Alexe8643642021-08-23 14:08:46 -050046
Alex9a4ad212020-10-01 18:04:25 -050047 # process wildcard, create node list out of mask
48 if skip_list:
49 _list = []
50 _invalid = []
51 for _item in skip_list:
52 if '*' in _item:
53 _str = _item[:_item.index('*')]
54 _nodes = [_m for _m in _names if _m.startswith(_str)]
55 if not _nodes:
56 logger_cli.warn(
57 "# WARNING: No nodes found for {}".format(_item)
58 )
59 _list.extend(_nodes)
60 else:
61 if _item in _names:
62 _list += _item
63 else:
64 logger_cli.warn(
65 "# WARNING: No node found for {}".format(_item)
66 )
67 # removing duplicates
68 _list = list(set(_list))
69 _skipped_minions.extend(_list)
70
71 return _skipped_minions
72
73
74class Nodes(object):
75 def __init__(self, config):
76 self.nodes = None
77 self.env_config = config
78
79 def skip_node(self, node):
80 # Add node to skip list
81 # Fro example if it is fails to comply with the rules
82
83 # check if we know such node
84 if node in self.nodes.keys() and node not in self.skip_list:
85 # yes, add it
86 self.skip_list.append(node)
87 return True
88 else:
89 return False
90
91 def get_nodes(self, skip_list=None, skip_list_file=None):
92 if not self.nodes:
93 if not skip_list and self.env_config.skip_nodes:
94 self.gather_node_info(
95 self.env_config.skip_nodes,
96 skip_list_file
97 )
98 else:
99 self.gather_node_info(skip_list, skip_list_file)
100 return self.nodes
101
102 def get_info(self):
103 _info = {
Ievgeniia Zadorozhna33d5c9a2025-09-04 17:36:55 +0200104 'mcp_release': self.mcp_release,
105 'openstack_release': self.openstack_release,
Ievgeniia Zadorozhna23906ed2025-09-04 19:53:44 +0200106 'k0rdent_release': self.kube.get_k0rdent_release(),
107 'cluster_name': self.kube.get_cluster_name_from_kube_config()
Alex9a4ad212020-10-01 18:04:25 -0500108 }
109 return _info
110
111 def is_node_available(self, node, log=True):
112 if node in self.skip_list:
113 if log:
114 logger_cli.info("-> node '{}' not active".format(node))
115 return False
116 elif node in self.not_responded:
117 if log:
118 logger_cli.info("-> node '{}' not responded".format(node))
119 return False
120 else:
121 return True
122
123
124class SaltNodes(Nodes):
125 def __init__(self, config):
126 super(SaltNodes, self).__init__(config)
Alexe0c5b9e2019-04-23 18:51:23 -0500127 logger_cli.info("# Gathering environment information")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600128 # simple salt rest client
Alex9a4ad212020-10-01 18:04:25 -0500129 self.salt = None
130 self.env_type = ENV_TYPE_SALT
Alex3ebc5632019-04-18 16:47:18 -0500131
Alexe9908f72020-05-19 16:04:53 -0500132 def gather_node_info(self, skip_list, skip_list_file):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600133 # Keys for all nodes
134 # this is not working in scope of 2016.8.3, will overide with list
Alexb151fbe2019-04-22 16:53:30 -0500135 logger_cli.debug("... collecting node names existing in the cloud")
Alexe0c5b9e2019-04-23 18:51:23 -0500136 if not self.salt:
Alex9a4ad212020-10-01 18:04:25 -0500137 self.salt = get_salt_remote(self.env_config)
Alexe0c5b9e2019-04-23 18:51:23 -0500138
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600139 try:
140 _keys = self.salt.list_keys()
141 _str = []
Alex3bc95f62020-03-05 17:00:04 -0600142 for _k, _v in _keys.items():
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600143 _str.append("{}: {}".format(_k, len(_v)))
144 logger_cli.info("-> keys collected: {}".format(", ".join(_str)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600145
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600146 self.node_keys = {
147 'minions': _keys['minions']
148 }
Alex3ebc5632019-04-18 16:47:18 -0500149 except Exception:
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600150 _keys = None
151 self.node_keys = None
Alex3ebc5632019-04-18 16:47:18 -0500152
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600153 # List of minions with grains
154 _minions = self.salt.list_minions()
155 if _minions:
Alex3ebc5632019-04-18 16:47:18 -0500156 logger_cli.info(
157 "-> api reported {} active minions".format(len(_minions))
158 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600159 elif not self.node_keys:
160 # this is the last resort
Alex9a4ad212020-10-01 18:04:25 -0500161 _minions = self.env_config.load_nodes_list()
Alex3ebc5632019-04-18 16:47:18 -0500162 logger_cli.info(
163 "-> {} nodes loaded from list file".format(len(_minions))
164 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600165 else:
166 _minions = self.node_keys['minions']
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600167
Alexe9908f72020-05-19 16:04:53 -0500168 # Skip nodes if needed
Alex9a4ad212020-10-01 18:04:25 -0500169 _skipped_minions = \
170 _prepare_skipped_nodes(_minions, skip_list, skip_list_file)
Alexe9908f72020-05-19 16:04:53 -0500171
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600172 # in case API not listed minions, we need all that answer ping
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600173 _active = self.salt.get_active_nodes()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600174 logger_cli.info("-> nodes responded: {}".format(len(_active)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600175 # iterate through all accepted nodes and create a dict for it
176 self.nodes = {}
Alex Savatieievefa79c42019-03-14 19:14:04 -0500177 self.skip_list = []
Alexe9908f72020-05-19 16:04:53 -0500178 _domains = set()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600179 for _name in _minions:
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600180 _nc = utils.get_node_code(_name)
Alex9a4ad212020-10-01 18:04:25 -0500181 _rmap = all_salt_roles_map
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600182 _role = _rmap[_nc] if _nc in _rmap else 'unknown'
Alexe9908f72020-05-19 16:04:53 -0500183 if _name in _skipped_minions:
184 _status = NODE_SKIP
Alex Savatieievefa79c42019-03-14 19:14:04 -0500185 self.skip_list.append(_name)
Alexe9908f72020-05-19 16:04:53 -0500186 else:
187 _status = NODE_UP if _name in _active else NODE_DOWN
188 if _status == NODE_DOWN:
189 self.skip_list.append(_name)
190 logger_cli.info(
191 "-> '{}' is down, "
192 "added to skip list".format(
193 _name
194 )
195 )
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600196 self.nodes[_name] = deepcopy(node_tmpl)
Alexe9908f72020-05-19 16:04:53 -0500197 self.nodes[_name]['shortname'] = _name.split(".", 1)[0]
198 _domains.add(_name.split(".", 1)[1])
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600199 self.nodes[_name]['node_group'] = _nc
200 self.nodes[_name]['role'] = _role
201 self.nodes[_name]['status'] = _status
Alexe9908f72020-05-19 16:04:53 -0500202 _domains = list(_domains)
203 if len(_domains) > 1:
204 logger_cli.warning(
205 "Multiple domains detected: {}".format(",".join(_domains))
206 )
Alex205546c2020-12-30 19:22:30 -0600207 # TODO: Use domain with biggest node count by default
208 # or force it via config option
Alexe9908f72020-05-19 16:04:53 -0500209 else:
210 self.domain = _domains[0]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500211 logger_cli.info("-> {} nodes inactive".format(len(self.skip_list)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600212 logger_cli.info("-> {} nodes collected".format(len(self.nodes)))
213
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600214 # form an all nodes compound string to use in salt
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600215 self.active_nodes_compound = self.salt.compound_string_from_list(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600216 filter(
Alexe9908f72020-05-19 16:04:53 -0500217 lambda nd: self.nodes[nd]['status'] == NODE_UP,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600218 self.nodes
219 )
220 )
Alex41485522019-04-12 17:26:18 -0500221 # get master node fqdn
Alex3bc95f62020-03-05 17:00:04 -0600222 # _filtered = filter(
223 # lambda nd: self.nodes[nd]['role'] == const.all_roles_map['cfg'],
224 # self.nodes
225 # )
Alex9a4ad212020-10-01 18:04:25 -0500226 _role = all_salt_roles_map['cfg']
Alex3bc95f62020-03-05 17:00:04 -0600227 _filtered = [n for n, v in self.nodes.items() if v['role'] == _role]
Alexe0c5b9e2019-04-23 18:51:23 -0500228 if len(_filtered) < 1:
229 raise SaltException(
230 "No master node detected! Check/Update node role map."
231 )
232 else:
233 self.salt.master_node = _filtered[0]
Alex3ebc5632019-04-18 16:47:18 -0500234
Alex41485522019-04-12 17:26:18 -0500235 # OpenStack versions
236 self.mcp_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500237 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500238 "_param:apt_mk_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500239 )[self.salt.master_node]
Alex41485522019-04-12 17:26:18 -0500240 self.openstack_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500241 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500242 "_param:openstack_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500243 )[self.salt.master_node]
Alexd0391d42019-05-21 18:48:55 -0500244 # Preload codenames
245 # do additional queries to get linux codename and arch for each node
246 self.get_specific_pillar_for_nodes("_param:linux_system_codename")
247 self.get_specific_pillar_for_nodes("_param:linux_system_architecture")
248 for _name in self.nodes.keys():
Alexe9547d82019-06-03 15:22:50 -0500249 _n = self.nodes[_name]
250 if _name not in self.skip_list:
251 _p = _n['pillars']['_param']
252 _n['linux_codename'] = _p['linux_system_codename']
253 _n['linux_arch'] = _p['linux_system_architecture']
Alex41485522019-04-12 17:26:18 -0500254
Alex1839bbf2019-08-22 17:17:21 -0500255 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500256 """Function runs. cmd.run and parses result into place
257 or into dict structure provided
258
259 :return: no return value, data pulished internally
260 """
261 logger_cli.debug(
262 "... collecting results for '{}'".format(cmd)
263 )
264 if target_dict:
265 _nodes = target_dict
266 else:
267 _nodes = self.nodes
Alex1839bbf2019-08-22 17:17:21 -0500268 _result = self.execute_cmd_on_active_nodes(cmd, nodes=nodes)
Alex3bc95f62020-03-05 17:00:04 -0600269 for node, data in _nodes.items():
Alexf3dbe862019-10-07 15:17:04 -0500270
Alex836fac82019-08-22 13:36:16 -0500271 if node in self.skip_list:
272 logger_cli.debug(
273 "... '{}' skipped while collecting '{}'".format(
274 node,
275 cmd
276 )
277 )
278 continue
279 # Prepare target key
280 if target_key not in data:
281 data[target_key] = None
282 # Save data
Alexe9908f72020-05-19 16:04:53 -0500283 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex836fac82019-08-22 13:36:16 -0500284 data[target_key] = None
Alex1839bbf2019-08-22 17:17:21 -0500285 elif node not in _result:
286 continue
Alex836fac82019-08-22 13:36:16 -0500287 elif not _result[node]:
288 logger_cli.debug(
289 "... '{}' not responded after '{}'".format(
290 node,
Alex9a4ad212020-10-01 18:04:25 -0500291 self.env_config.salt_timeout
Alex836fac82019-08-22 13:36:16 -0500292 )
293 )
294 data[target_key] = None
295 else:
296 data[target_key] = _result[node]
297
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600298 def get_specific_pillar_for_nodes(self, pillar_path):
299 """Function gets pillars on given path for all nodes
300
301 :return: no return value, data pulished internally
302 """
Alex3ebc5632019-04-18 16:47:18 -0500303 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500304 "... collecting node pillars for '{}'".format(pillar_path)
Alex3ebc5632019-04-18 16:47:18 -0500305 )
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600306 _result = self.salt.pillar_get(self.active_nodes_compound, pillar_path)
Alex Savatieievefa79c42019-03-14 19:14:04 -0500307 self.not_responded = []
Alex3bc95f62020-03-05 17:00:04 -0600308 for node, data in self.nodes.items():
Alex Savatieievefa79c42019-03-14 19:14:04 -0500309 if node in self.skip_list:
310 logger_cli.debug(
311 "... '{}' skipped while collecting '{}'".format(
312 node,
313 pillar_path
314 )
315 )
316 continue
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600317 _pillar_keys = pillar_path.split(':')
318 _data = data['pillars']
319 # pre-create nested dict
320 for idx in range(0, len(_pillar_keys)-1):
321 _key = _pillar_keys[idx]
322 if _key not in _data:
323 _data[_key] = {}
324 _data = _data[_key]
Alexe9908f72020-05-19 16:04:53 -0500325 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex Savatieievefa79c42019-03-14 19:14:04 -0500326 _data[_pillar_keys[-1]] = None
327 elif not _result[node]:
328 logger_cli.debug(
329 "... '{}' not responded after '{}'".format(
330 node,
Alex9a4ad212020-10-01 18:04:25 -0500331 self.env_config.salt_timeout
Alex Savatieievefa79c42019-03-14 19:14:04 -0500332 )
333 )
334 _data[_pillar_keys[-1]] = None
335 self.not_responded.append(node)
336 else:
337 _data[_pillar_keys[-1]] = _result[node]
Alex3ebc5632019-04-18 16:47:18 -0500338
Alexe0c5b9e2019-04-23 18:51:23 -0500339 def prepare_json_on_node(self, node, _dict, filename):
Alex359e5752021-08-16 17:28:30 -0500340 if node in self.skip_list:
341 logger_cli.debug(
342 "... '{}' skipped while preparing json file of '{}'".format(
343 node,
344 filename
345 )
346 )
347
Alexe0c5b9e2019-04-23 18:51:23 -0500348 # this function assumes that all folders are created
349 _dumps = json.dumps(_dict, indent=2).splitlines()
350 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500351 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alexe0c5b9e2019-04-23 18:51:23 -0500352 )
353 logger_cli.debug(
354 "... uploading data as '{}' "
355 "to master's file cache folder: '{}'".format(
356 filename,
357 _storage_path
358 )
359 )
360 _cache_path = os.path.join(_storage_path, filename)
361 _source_path = os.path.join(
362 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500363 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500364 filename
365 )
366 _target_path = os.path.join(
367 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500368 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500369 filename
370 )
371
372 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
373 self.salt.f_touch_master(_cache_path)
374 self.salt.f_append_master(_cache_path, _dumps)
375 logger.debug("... syncing file to '{}'".format(node))
376 self.salt.get_file(
377 node,
378 _source_path,
379 _target_path,
380 tgt_type="compound"
381 )
382 return _target_path
383
384 def prepare_script_on_active_nodes(self, script_filename):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600385 # Prepare script
386 _p = os.path.join(pkg_dir, 'scripts', script_filename)
387 with open(_p, 'rt') as fd:
388 _script = fd.read().splitlines()
389 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500390 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600391 )
392 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500393 "... uploading script {} "
Alex3ebc5632019-04-18 16:47:18 -0500394 "to master's file cache folder: '{}'".format(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600395 script_filename,
396 _storage_path
397 )
398 )
Alexe0c5b9e2019-04-23 18:51:23 -0500399 self.salt.mkdir(self.salt.master_node, _storage_path)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600400 # Form cache, source and target path
401 _cache_path = os.path.join(_storage_path, script_filename)
402 _source_path = os.path.join(
403 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500404 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600405 script_filename
406 )
407 _target_path = os.path.join(
408 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500409 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600410 script_filename
411 )
412
Alexb151fbe2019-04-22 16:53:30 -0500413 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
Alex3ebc5632019-04-18 16:47:18 -0500414 self.salt.f_touch_master(_cache_path)
415 self.salt.f_append_master(_cache_path, _script)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600416 # command salt to copy file to minions
Alex3ebc5632019-04-18 16:47:18 -0500417 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500418 "... creating script target folder '{}'".format(
Alex3ebc5632019-04-18 16:47:18 -0500419 _cache_path
420 )
421 )
422 self.salt.mkdir(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600423 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600424 os.path.join(
425 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500426 self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600427 ),
428 tgt_type="compound"
429 )
Alex3ebc5632019-04-18 16:47:18 -0500430 logger.debug("... syncing file to nodes")
431 self.salt.get_file(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600432 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600433 _source_path,
434 _target_path,
435 tgt_type="compound"
436 )
Alexe0c5b9e2019-04-23 18:51:23 -0500437 # return path on nodes, just in case
438 return _target_path
439
440 def execute_script_on_node(self, node, script_filename, args=[]):
441 # Prepare path
442 _target_path = os.path.join(
443 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500444 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500445 script_filename
446 )
447
448 # execute script
449 logger.debug("... running script on '{}'".format(node))
450 # handle results for each node
451 _script_arguments = " ".join(args) if args else ""
452 self.not_responded = []
453 _r = self.salt.cmd(
454 node,
455 'cmd.run',
456 param='python {} {}'.format(_target_path, _script_arguments),
457 expr_form="compound"
458 )
459
460 # all false returns means that there is no response
461 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
462 return _r
463
Alex1f90e7b2021-09-03 15:31:28 -0500464 def execute_script_on_active_nodes(self, script_filename, args=None):
Alexe0c5b9e2019-04-23 18:51:23 -0500465 # Prepare path
466 _target_path = os.path.join(
467 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500468 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500469 script_filename
470 )
471
472 # execute script
Alexd0391d42019-05-21 18:48:55 -0500473 logger_cli.debug("... running script")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600474 # handle results for each node
Alex1f90e7b2021-09-03 15:31:28 -0500475 _script_arguments = args if args else ""
Alex Savatieievefa79c42019-03-14 19:14:04 -0500476 self.not_responded = []
477 _r = self.salt.cmd(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600478 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600479 'cmd.run',
480 param='python {} {}'.format(_target_path, _script_arguments),
481 expr_form="compound"
482 )
483
Alex Savatieievefa79c42019-03-14 19:14:04 -0500484 # all false returns means that there is no response
Alex3ebc5632019-04-18 16:47:18 -0500485 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500486 return _r
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600487
Alex1839bbf2019-08-22 17:17:21 -0500488 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500489 # execute cmd
490 self.not_responded = []
491 _r = self.salt.cmd(
Alex1839bbf2019-08-22 17:17:21 -0500492 nodes if nodes else self.active_nodes_compound,
Alex836fac82019-08-22 13:36:16 -0500493 'cmd.run',
494 param=cmd,
495 expr_form="compound"
496 )
497
498 # all false returns means that there is no response
499 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
500 return _r
501
Alex9a4ad212020-10-01 18:04:25 -0500502
503class KubeNodes(Nodes):
504 def __init__(self, config):
505 super(KubeNodes, self).__init__(config)
506 logger_cli.info("# Gathering environment information")
507 # simple salt rest client
508 self.kube = get_kube_remote(self.env_config)
509 self.env_type = ENV_TYPE_KUBE
Alex1f90e7b2021-09-03 15:31:28 -0500510 self._namespace = "qa-space"
511 self._configmap_name = self.env_config.kube_scripts_folder
512
513 # prepare needed resources
Alexb78191f2021-11-02 16:35:46 -0500514 self.prepared_daemonsets = []
Alex0bcf31b2022-03-29 17:38:58 -0500515 # Check if we need resources prepared
516 if not config.prepare_qa_resources:
517 logger_cli.debug("... skipped preparing resources")
518 self._scripts = None
519 return
520 else:
521 self._check_namespace()
522 self._scripts = self._check_config_map()
Alex1f90e7b2021-09-03 15:31:28 -0500523
524 def _check_namespace(self):
525 # ensure namespace
526 logger_cli.debug(
527 "... checking namespace '{}'".format(self._namespace)
528 )
529 if not self.kube.ensure_namespace(self._namespace):
530 raise KubeException(
531 "Failed to manage namespace '{}'".format(self._namespace)
532 )
533
534 def _check_config_map(self):
535 # ensure config map exists
536 logger_cli.debug(
537 "... checking config map '{}'".format(self._configmap_name)
538 )
539 _source = os.path.join(pkg_dir, 'scripts')
540 return self.kube.create_config_map(
541 self._namespace,
542 self._configmap_name,
543 _source
544 )
Alex9a4ad212020-10-01 18:04:25 -0500545
Ievgeniia Zadorozhna33d5c9a2025-09-04 17:36:55 +0200546 def get_k0rdent_release(self):
547 logger_cli.debug("... get k0rdent release")
548 self.k0rdent_release = self.kube.get_k0rdent_release()
549 return self.k0rdent_release
550
Alex9a4ad212020-10-01 18:04:25 -0500551 def gather_node_info(self, skip_list, skip_list_file):
552 # Gather nodes info and query pod lists for each node
553 logger_cli.debug("... collecting node names existing in the cloud")
554
555 # Gather node names and info
556 _nodes = self.kube.get_node_info()
557 _node_names = list(_nodes.keys())
558 # Skip nodes if needed
559 _skipped_nodes = \
560 _prepare_skipped_nodes(_node_names, skip_list, skip_list_file)
561
562 # Count how many nodes active
563 self._active = [n for n, v in _nodes.items()
564 if v['conditions']['ready']['status']]
565
566 # iterate through all accepted nodes and create a dict for it
567 self.nodes = {}
568 self.skip_list = []
Alex9a4ad212020-10-01 18:04:25 -0500569 for _name in _node_names:
570 if _name in _skipped_nodes:
571 _status = NODE_SKIP
572 self.skip_list.append(_name)
573 else:
574 _status = NODE_UP if _name in self._active else NODE_DOWN
575 if _status == NODE_DOWN:
576 self.skip_list.append(_name)
577 logger_cli.info(
578 "-> '{}' shows 'Ready' as 'False', "
579 "added to skip list".format(
580 _name
581 )
582 )
583 _roles = {}
584 _labels = {}
585 for _label, _value in _nodes[_name]['labels'].items():
586 if _label in all_kube_roles_map:
587 _roles[all_kube_roles_map[_label]] = _value
588 else:
589 _labels[_label] = _value
590
591 self.nodes[_name] = deepcopy(node_tmpl)
592 self.nodes[_name].pop("grains")
593 self.nodes[_name].pop("pillars")
594
595 # hostname
596 self.nodes[_name]['shortname'] = \
597 _nodes[_name]['addresses']['hostname']['address']
Alexe4de1142022-11-04 19:26:03 -0500598 # internal
Alex9a4ad212020-10-01 18:04:25 -0500599 self.nodes[_name]['internalip'] = \
600 _nodes[_name]['addresses']['internalip']['address']
Alexe4de1142022-11-04 19:26:03 -0500601 # alternate
602 if self.env_config.force_node_network is not None:
603 iIP = self.nodes[_name]['internalip']
604 # use last number
605 aIP = self.env_config.force_node_network + iIP.split('.')[-1]
606 self.nodes[_name]["altip"] = aIP
Alex9a4ad212020-10-01 18:04:25 -0500607 self.nodes[_name]['node_group'] = None
608 self.nodes[_name]['labels'] = _labels
609 self.nodes[_name]['roles'] = _roles
610 self.nodes[_name]['status'] = _status
611 # Backward compatibility
612 _info = _nodes[_name]['status']['node_info']
613 self.nodes[_name]['linux_image'] = _info['os_image']
614 self.nodes[_name]['linux_arch'] = _info['architecture']
615
616 _codename = "unknown"
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200617 _info_str = _info['os_image']
618 if _info_str.lower().startswith('ubuntu'):
619 _n, _v, _ = _info_str.split(maxsplit=2)
Alex9a4ad212020-10-01 18:04:25 -0500620 _v, _, _ = _v.rpartition('.') if '.' in _v else (_v, "", "")
621 if _v in ubuntu_versions:
622 _codename = ubuntu_versions[_v].split()[0].lower()
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200623 elif _info_str.lower().startswith('debian'):
624 parts = _info_str.split()
625 for part in parts:
626 if part.startswith("(") and part.endswith(")"):
627 _codename = part.strip("()").lower()
Alex9a4ad212020-10-01 18:04:25 -0500628 self.nodes[_name]['linux_codename'] = _codename
629
630 # Consider per-data type transfer
631 self.nodes[_name]["raw"] = _nodes[_name]
632 # TODO: Investigate how to handle domains in Kube, probably - skip
633 # _domains = list(_domains)
634 # if len(_domains) > 1:
635 # logger_cli.warning(
636 # "Multiple domains detected: {}".format(",".join(_domains))
637 # )
638 # else:
Alex1f90e7b2021-09-03 15:31:28 -0500639 self.domain = "no.domain.in.kube.yet"
Alex9a4ad212020-10-01 18:04:25 -0500640 logger_cli.info(
641 "-> {} nodes collected: {} - active, {} - not active".format(
642 len(self.nodes),
643 len(self._active),
644 len(self.skip_list)
645 )
646 )
647
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200648 _role = "control-plane"
649 _filtered = [n for n, v in self.nodes.items() if 'control-plane' in str(v['labels'])]
Alex9a4ad212020-10-01 18:04:25 -0500650 if len(_filtered) < 1:
651 raise KubeException(
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200652 "No {} nodes detected! Check/Update node role map.".format(_role)
Alex9a4ad212020-10-01 18:04:25 -0500653 )
Alex Savatieievefa79c42019-03-14 19:14:04 -0500654 else:
Alex9a4ad212020-10-01 18:04:25 -0500655 _r = [n for n, v in self.nodes.items()
656 if v['status'] != NODE_UP and _role in v['roles']]
657 if len(_r) > 0:
658 logger_cli.warn(
659 "Master nodes are reporting 'NotReady:\n{}".format(
660 "\n".join(_r)
661 )
662 )
663 self.kube.master_node = _filtered[0]
Alexe0c5b9e2019-04-23 18:51:23 -0500664
Alex9a4ad212020-10-01 18:04:25 -0500665 # get specific data upfront
666 # OpenStack versions
667 self.mcp_release = ""
668 # Quick and Dirty way to detect OS release
Alexccb72e02021-01-20 16:38:03 -0600669 try:
670 _nova_version = self.kube.exec_on_target_pod(
671 "nova-manage --version",
672 "nova-api-osapi",
673 "openstack"
674 )
675 _nmajor = _nova_version.partition('.')[0]
676 self.openstack_release = nova_openstack_versions[_nmajor]
677 except KubeException as e:
678 logger_cli.warn("Openstack not detected: {}".format(e.message))
679 self.openstack_release = nova_openstack_versions["00"]
Alexe0c5b9e2019-04-23 18:51:23 -0500680
Alex9a4ad212020-10-01 18:04:25 -0500681 return
682
683 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500684 def _get_ssh_shell(_h, _u, _k, _p, _q, _pipe, timeout=15):
Alex9a4ad212020-10-01 18:04:25 -0500685 _ssh = SshShell(
686 _h,
687 user=_u,
688 keypath=_k,
689 port=_p,
690 silent=_q,
Alex1f90e7b2021-09-03 15:31:28 -0500691 piped=_pipe,
692 timeout=timeout
Alex9a4ad212020-10-01 18:04:25 -0500693 )
694 return _ssh.connect()
695
696 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500697 def _do_ssh_cmd(_cmd, _h, _u, _k, _p, _q, _pipe, timeout=None):
Alex9a4ad212020-10-01 18:04:25 -0500698 with SshShell(
699 _h,
700 user=_u,
701 keypath=_k,
702 port=_p,
703 silent=_q,
704 piped=_pipe
705 ) as ssh:
Alex1f90e7b2021-09-03 15:31:28 -0500706 if timeout is None:
707 _r = ssh.do(_cmd)
708 else:
709 _r = ssh.do(_cmd, timeout=timeout)
Alex9a4ad212020-10-01 18:04:25 -0500710 logger_cli.debug("'{}'".format(_r))
711 return _r
712
713 def node_shell(
714 self,
715 node,
716 silent=True,
717 piped=True,
718 use_sudo=True,
719 fport=None
720 ):
721 _u = self.env_config.kube_node_user
722 _k = self.env_config.kube_node_keypath
Alexe4de1142022-11-04 19:26:03 -0500723
724 _n = self.nodes[node]
725 _h = _n['altip'] if "altip" in _n else _n['internalip']
Alex9a4ad212020-10-01 18:04:25 -0500726 _p = 22
Alexeffa0682021-06-04 12:18:33 -0500727 if self.kube.is_local or self.kube.config.ssh_direct:
Alexf6ec91b2021-09-10 10:11:17 -0500728 logger.debug("Getting shell with no port forward")
729 return [None, self._get_ssh_shell(
Alex1f90e7b2021-09-03 15:31:28 -0500730 _h, _u, _k, _p, silent, piped,
731 timeout=self.kube.config.ssh_connect_timeout
Alexf6ec91b2021-09-10 10:11:17 -0500732 )]
Alex9a4ad212020-10-01 18:04:25 -0500733 else:
Alexf6ec91b2021-09-10 10:11:17 -0500734 logger.debug("Getting shell with with forward")
Alex9a4ad212020-10-01 18:04:25 -0500735 _fh = "localhost"
736 _p = 10022 if not fport else fport
737 _pfwd = PortForward(
738 self.env_config.ssh_host,
739 _h,
740 user=_u,
741 keypath=self.env_config.ssh_key,
Alex1f90e7b2021-09-03 15:31:28 -0500742 loc_port=_p,
743 timeout=self.kube.config.ssh_connect_timeout
Alex9a4ad212020-10-01 18:04:25 -0500744 )
745 _pfwd.connect()
Alex1f90e7b2021-09-03 15:31:28 -0500746 _ssh = self._get_ssh_shell(
747 _fh,
748 _u,
749 _k,
750 _p,
751 silent,
752 piped,
753 timeout=self.kube.config.ssh_connect_timeout
754 )
Alexf6ec91b2021-09-10 10:11:17 -0500755 return [_pfwd, _ssh]
Alex9a4ad212020-10-01 18:04:25 -0500756
757 def execute_script_on_node(self, node, script_filename, args=[]):
758 # Prepare path
759 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600760 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500761 self.env_config.kube_scripts_folder,
762 script_filename
763 )
764
765 # execute script
766 logger_cli.debug("... running script on '{}'".format(node))
767 # handle results for each node
768 _script_arguments = " ".join(args) if args else ""
769 self.not_responded = []
770 # get result
771 _nr = self.node_shell(
772 node,
773 "python {} {}".format(
774 _target_path,
775 _script_arguments
776 )
777 )
778
779 if not _nr:
780 self.not_responded.append(node)
781 return {}
782 else:
783 return {node: _nr}
784
785 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
786 # execute script
787 logger_cli.debug("...running '{}' on active nodes".format(cmd))
788 # handle results for each node
789 self.not_responded = []
790 _r = {}
791 # TODO: Use threading and pool
792 for node in self._active:
793 _nr = self.node_shell(
794 node,
795 cmd
796 )
797
798 if not _nr:
799 self.not_responded.append(node)
800 else:
801 _r[node] = _nr
802
803 return _r
804
Alex1f90e7b2021-09-03 15:31:28 -0500805 def _ssh_exec_script(self, params):
Alex9a4ad212020-10-01 18:04:25 -0500806 """
807 Threadsafe method to get shell to node,
808 check/copy script and get results
809 [
810 node_name,
811 src_path,
812 tgt_path,
813 conf,
814 args
815 ]
816 """
Alex1f90e7b2021-09-03 15:31:28 -0500817 _timeout = self.kube.config.script_execution_timeout
Alex9a4ad212020-10-01 18:04:25 -0500818 _name = params[0]
819 _src = params[1]
820 _tgt = params[2]
821 _conf = params[3]
822 _args = params[4]
823 _port = params[5]
824 _log_name = "["+_name+"]:"
825 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
826 "else echo False; fi)"
827 _fwd_sh, _sh = self.node_shell(
828 _name,
829 use_sudo=False,
830 fport=_port
831 )
832 # check python3
833 _python = _sh.do("which python3")
834 _python = utils.to_bool(
835 _sh.do(_check.format(_python))
836 )
837 if not _python:
Alex1f90e7b2021-09-03 15:31:28 -0500838 _sh.do("apt install python3", sudo=True, timeout=_timeout)
Alex9a4ad212020-10-01 18:04:25 -0500839 # check if script already there
840 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600841 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500842 _conf.kube_scripts_folder
843 )
844 # check if folder exists
845 _folder_exists = utils.to_bool(
846 _sh.do(_check.format(_folder))
847 )
848 if not _folder_exists:
849 _sh.do("mkdir " + _folder)
850 logger.info("{} Syncing file".format(_log_name))
851 _code, _r, _e = _sh.scp(
852 _src,
853 _sh.get_host_path(_tgt),
854 )
855 # handle error code
856 if _code:
857 logger_cli.warn(
858 "{} Error in scp:\n"
859 "\tstdout:'{}'\n"
860 "\tstderr:'{}'".format(_log_name, _r, _e)
861 )
862
863 # execute script
864 logger.debug("{} Running script".format(_log_name))
865 _out = _sh.do(
866 "python3 {}{}".format(
867 _tgt,
868 _args
869 ),
Alex1f90e7b2021-09-03 15:31:28 -0500870 sudo=True,
871 timeout=_timeout
Alex9a4ad212020-10-01 18:04:25 -0500872 )
873
874 if _fwd_sh:
875 _fwd_sh.kill()
876 _sh.kill()
877
878 return [_name, _out]
879
Alex1f90e7b2021-09-03 15:31:28 -0500880 def execute_script_on_active_nodes(self, script_filename, args=None):
Alex9a4ad212020-10-01 18:04:25 -0500881 # Prepare script
882 _source_path = os.path.join(pkg_dir, 'scripts', script_filename)
883 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600884 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500885 self.env_config.kube_scripts_folder,
886 script_filename
887 )
888 # handle results for each node
889 _script_arguments = " ".join(args) if args else ""
890 if _script_arguments:
891 _script_arguments = " " + _script_arguments
892 self.not_responded = []
893 _results = {}
894 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500895 "... running '{}' on active nodes, {} worker threads".format(
Alex9a4ad212020-10-01 18:04:25 -0500896 script_filename,
897 self.env_config.threads
898 )
899 )
900 # Workers pool
901 pool = Pool(self.env_config.threads)
902
903 # init the parameters
904 # node_name,
905 # src_path,
906 # tgt_path,
907 # conf,
908 # args
909 _params = []
910 _port = 10022
911 for node in self._active:
912 # build parameter blocks
913 _p_list = [
914 node,
915 _source_path,
916 _target_path,
917 self.env_config,
918 _script_arguments,
919 _port
920 ]
921 _params.append(_p_list)
922 _port += 1
923
924 _progress = Progress(len(_params))
Alex1f90e7b2021-09-03 15:31:28 -0500925 results = pool.imap_unordered(self._ssh_exec_script, _params)
Alex9a4ad212020-10-01 18:04:25 -0500926
927 for ii in enumerate(results, start=1):
928 if not ii[1][1]:
929 self.not_responded.append(ii[1][0])
930 else:
931 _results[ii[1][0]] = ii[1][1]
932 _progress.write_progress(ii[0])
933
934 _progress.end()
935 pool.close()
936 pool.join()
937
938 # return path on nodes, just in case
939 return _results
940
941 def prepare_json_on_node(self, node, _dict, filename):
942 # this function assumes that all folders are created
943 _dumps = json.dumps(_dict, indent=2).splitlines()
944
945 _source_path = create_temp_file_with_content(_dumps)
946 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600947 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500948 self.env_config.kube_scripts_folder,
949 filename
950 )
951 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600952 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500953 self.env_config.kube_scripts_folder
954 )
955 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
956 "else echo False; fi)"
957 _fwd_sh, _sh = self.node_shell(
958 node,
959 use_sudo=False
960 )
961
962 # check if folder exists
963 _folder_exists = utils.to_bool(
964 _sh.do(_check.format(_folder))
965 )
966 if not _folder_exists:
967 _sh.do("mkdir " + _folder)
968 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500969 "... create data on node '{}':'{}'".format(node, _target_path)
Alex9a4ad212020-10-01 18:04:25 -0500970 )
971 _code, _r, _e = _sh.scp(
972 _source_path,
973 _sh.get_host_path(_target_path),
974 )
975 # handle error code
976 if _code:
977 logger_cli.warn(
978 "Error in scp:\n"
979 "\tstdout:'{}'\n"
980 "\tstderr:'{}'".format(_r, _e)
981 )
982
983 _fwd_sh.kill()
984 _sh.kill()
985 return _target_path
Alex1f90e7b2021-09-03 15:31:28 -0500986
Alex7b0ee9a2021-09-21 17:16:17 -0500987 def prepare_daemonset(self, template_filename):
Alex1f90e7b2021-09-03 15:31:28 -0500988 # load template
989 _yaml_file = os.path.join(pkg_dir, 'templates', template_filename)
990 logger_cli.debug("... loading template '{}'".format(_yaml_file))
991 _ds = {}
992 with open(_yaml_file) as dsFile:
993 _ds = yaml.load(dsFile, Loader=yaml.SafeLoader)
994
995 # Add scripts to pod template as volumeMounts
996 _tspec = _ds['spec']['template']['spec']
997 _tspec['containers'][0]['volumeMounts'] = [
998 {
999 "name": "scripts",
1000 "mountPath": os.path.join(
1001 "/",
1002 self.env_config.kube_scripts_folder
1003 )
1004 }
1005 ]
1006
1007 _tspec['volumes'] = [
1008 {
1009 "name": "scripts",
1010 "configMap": {
1011 "name": self._configmap_name
1012 }
1013 }
1014 ]
1015
1016 # create daemonset
1017 logger_cli.debug("... preparing daemonset")
Alexb78191f2021-11-02 16:35:46 -05001018 _ds = self.kube.prepare_daemonset_from_yaml(self._namespace, _ds)
1019 # Save prepared daemonset
1020 self.prepared_daemonsets.append(_ds)
1021 # return it
1022 return _ds
Alex1f90e7b2021-09-03 15:31:28 -05001023
1024 def wait_for_daemonset(self, ds, timeout=120):
1025 # iteration timeout
1026 _sleep_time = 5
1027 _timeout = timeout
1028
1029 # query daemonset and check that desired=scheduled=ready
1030 _ds = self.kube.get_daemon_set_by_name(
1031 ds.metadata.namespace,
1032 ds.metadata.name
1033 )
1034
Alex0bcf31b2022-03-29 17:38:58 -05001035 _total = len(self.nodes) - len(self.skip_list)
Alex1f90e7b2021-09-03 15:31:28 -05001036 # _scheduled = _ds.status.scheduled
1037 # _ready = _ds.status.ready
1038
1039 # Init Progress bar to show daemonset readiness
1040 _progress = Progress(_total)
1041 while _timeout > 0:
1042 # get new status
1043 _ds = self.kube.get_daemon_set_by_name(
1044 ds.metadata.namespace,
1045 ds.metadata.name
1046 )
1047 _desired = _ds.status.desired_number_scheduled
1048 _scheduled = _ds.status.current_number_scheduled
1049 _ready = _ds.status.number_ready
1050 _updated = _ds.status.updated_number_scheduled
1051 # print it
1052 _progress.write_progress(
1053 _ready,
1054 note="desired: {}, scheduled: {}, ready: {},"
1055 " up-to-date: {}".format(
1056 _desired,
1057 _scheduled,
1058 _ready,
1059 _updated
1060 )
1061 )
1062
1063 # check values and return
1064 # In case of Update, also checking _updated value
Alex163aa042022-12-01 11:58:32 -06001065 if _ready == _updated and _ready == _total - len(self.skip_list):
Alex1f90e7b2021-09-03 15:31:28 -05001066 # close progress bar class
1067 _progress.end()
1068 logger_cli.debug("... daemonset is ready")
1069 return True
1070 # iterate
1071 _timeout -= _sleep_time
1072 # wait
1073 sleep(_sleep_time)
1074
1075 # timed out
1076 _progress.end()
1077 # log it
1078 logger_cli.error("Timed out waiting for Daemonset to be ready")
1079 return False
1080
Alexdcb792f2021-10-04 14:24:21 -05001081 def exec_script_on_target_pod(self, pod_name, script_filename, args=None):
Alex7b0ee9a2021-09-21 17:16:17 -05001082 """
1083 Run script from configmap on target pod assuming it is present
1084 """
1085 _arguments = args if args else ""
1086 _cmd = [
1087 "python3",
1088 os.path.join(
1089 "/",
1090 self.env_config.kube_scripts_folder,
1091 script_filename
1092 )
1093 ] + _arguments
1094 _result = self.kube.exec_on_target_pod(
1095 _cmd,
1096 pod_name,
1097 self._namespace,
1098 strict=True
1099 )
1100 return _result
1101
Alexdcb792f2021-10-04 14:24:21 -05001102 def exec_cmd_on_target_pod(self, pod_name, ns, command_str):
1103 """
Alex0bcf31b2022-03-29 17:38:58 -05001104 Run cmd on target pod
1105
Alexdcb792f2021-10-04 14:24:21 -05001106 """
1107 _result = self.kube.exec_on_target_pod(
1108 command_str,
1109 pod_name,
1110 ns,
1111 strict=True
1112 )
1113 return _result
1114
Alexb78191f2021-11-02 16:35:46 -05001115 def execute_cmd_on_daemon_set(
1116 self,
1117 ds,
1118 cmd,
Alexb2129542021-11-23 15:49:42 -06001119 _args=None,
Alexb78191f2021-11-02 16:35:46 -05001120 is_script=False
1121 ):
Alex1f90e7b2021-09-03 15:31:28 -05001122 """
1123 Query daemonset for pods and execute script on all of them
1124 """
Alexb2129542021-11-23 15:49:42 -06001125 _results = self.exec_cmd_on_pods(
1126 self.kube.get_pods_for_daemonset(ds),
1127 cmd,
1128 _args=_args,
1129 is_script=is_script
1130 )
1131 # Update results
1132 _ds_results = {}
Alex0bcf31b2022-03-29 17:38:58 -05001133 # only node name and result is needed
1134 # pod name and cmd ignored
1135 for _n, _, _v, _ in _results:
Alexb2129542021-11-23 15:49:42 -06001136 _ds_results[_n] = _v
1137 return _ds_results
1138
Alexe4de1142022-11-04 19:26:03 -05001139 def exec_on_labeled_pods_and_ns(
1140 self,
1141 label_str,
1142 cmd,
1143 _args=None,
1144 ns=None,
1145 silent=False
1146 ):
Alexb2129542021-11-23 15:49:42 -06001147 if not ns:
1148 ns = self._namespace
1149 _results = self.exec_cmd_on_pods(
1150 self.kube.list_pods(ns, label_str=label_str),
1151 cmd,
Alexe4de1142022-11-04 19:26:03 -05001152 _args=_args,
1153 silent=silent
Alexb2129542021-11-23 15:49:42 -06001154 )
1155 _pod_results = {}
1156 for _, _p, _v in _results:
1157 _pod_results[_p] = _v
1158 return _pod_results
1159
Alex0bcf31b2022-03-29 17:38:58 -05001160 def _pooled_exec_on_pod(self, plist, silent=False):
Alex1f90e7b2021-09-03 15:31:28 -05001161 def _kube_exec_on_pod(plist):
1162 return [
1163 plist[1], # node
1164 plist[3], # pod name
1165 plist[0].kube.exec_on_target_pod( # pointer to function
1166 plist[4], # cmd
1167 plist[3], # pod name
1168 plist[2], # namespace
1169 strict=True,
1170 _request_timeout=120,
Alexb78191f2021-11-02 16:35:46 -05001171 arguments=plist[5]
Alex0bcf31b2022-03-29 17:38:58 -05001172 ),
1173 # save cmd used
1174 plist[4]
Alex1f90e7b2021-09-03 15:31:28 -05001175 ]
Alex0bcf31b2022-03-29 17:38:58 -05001176 # map func and cmd
1177 pool = Pool(self.env_config.threads)
1178 _results = []
1179 self.not_responded = []
1180 # create result list
1181 if not silent:
1182 _progress = Progress(len(plist))
1183 ret = pool.imap_unordered(_kube_exec_on_pod, plist)
1184
1185 for ii in enumerate(ret, start=1):
1186 if not ii[1][1]:
1187 self.not_responded.append(ii[1][0])
1188 else:
1189 _results.append(ii[1])
1190 if not silent:
1191 _progress.write_progress(ii[0])
1192
1193 if not silent:
1194 _progress.end()
1195 pool.close()
1196 pool.join()
1197 logger_cli.debug(
1198 "... done, {} total outputs; {} not responded".format(
1199 len(_results),
1200 len(self.not_responded)
1201 )
1202 )
1203 return _results
1204
1205 def exec_cmd_on_pods(
1206 self,
1207 pod_list,
1208 cmd,
1209 _args=None,
1210 is_script=False,
1211 silent=False
1212 ):
Alex1f90e7b2021-09-03 15:31:28 -05001213
Alex1f90e7b2021-09-03 15:31:28 -05001214 # Create map for threads: [[node_name, ns, pod_name, cmd]...]
1215 logger_cli.debug(
1216 "... runnning script on {} pods using {} threads at a time".format(
Alexb2129542021-11-23 15:49:42 -06001217 len(pod_list.items),
Alex1f90e7b2021-09-03 15:31:28 -05001218 self.env_config.threads
1219 )
1220 )
1221 _plist = []
Alexb2129542021-11-23 15:49:42 -06001222 _arguments = _args if _args else ""
Alexb78191f2021-11-02 16:35:46 -05001223 if is_script:
1224 _cmd = [
1225 "python3",
1226 os.path.join(
1227 "/",
1228 self.env_config.kube_scripts_folder,
1229 cmd
1230 ),
1231 _arguments
1232 ]
1233 _cmd = " ".join(_cmd)
1234 else:
1235 # decide if we are to wrap it to bash
1236 if '|' in cmd:
1237 _cmd = "bash -c"
1238 _arguments = cmd
1239 else:
1240 _cmd = cmd
Alexb2129542021-11-23 15:49:42 -06001241 for item in pod_list.items:
Alex1f90e7b2021-09-03 15:31:28 -05001242 _plist.append(
1243 [
1244 self,
1245 item.spec.node_name,
1246 item.metadata.namespace,
1247 item.metadata.name,
Alexb78191f2021-11-02 16:35:46 -05001248 _cmd,
1249 _arguments
Alex1f90e7b2021-09-03 15:31:28 -05001250 ]
1251 )
1252
Alex0bcf31b2022-03-29 17:38:58 -05001253 return self._pooled_exec_on_pod(_plist, silent=silent)
Alex1f90e7b2021-09-03 15:31:28 -05001254
Alex0bcf31b2022-03-29 17:38:58 -05001255 def exec_cmds_on_pod(self, pod, cmd_list):
Alex1f90e7b2021-09-03 15:31:28 -05001256 logger_cli.debug(
Alex0bcf31b2022-03-29 17:38:58 -05001257 "... runnning {} cmds using {} threads at a time".format(
1258 len(cmd_list),
1259 self.env_config.threads
Alex1f90e7b2021-09-03 15:31:28 -05001260 )
1261 )
Alex0bcf31b2022-03-29 17:38:58 -05001262 _plist = []
1263 # decide if we are to wrap it to bash
1264 for item in cmd_list:
1265 if '|' in item:
1266 _cmd = "bash -c"
1267 _arguments = item
1268 else:
1269 _cmd = item
1270 _arguments = ""
1271 _plist.append(
1272 [
1273 self,
1274 pod.spec.node_name,
1275 pod.metadata.namespace,
1276 pod.metadata.name,
1277 _cmd,
1278 _arguments
1279 ]
1280 )
1281
1282 return self._pooled_exec_on_pod(_plist)
Alex1f90e7b2021-09-03 15:31:28 -05001283
1284 def delete_daemonset(self, ds):
1285 # Try to delete daemonset
1286 try:
1287 _r = self.kube.delete_daemon_set_by_name(
1288 ds.metadata.namespace,
1289 ds.metadata.name
1290 )
1291 except Exception as e:
1292 logger_cli.warning("Failed to delete daemonset '{}': {}".format(
1293 ds.metadata.name,
1294 e.reason
1295 ))
1296 _r = None
1297 return _r
Alex7b0ee9a2021-09-21 17:16:17 -05001298
1299 def get_pod_name_in_daemonset_by_node(self, nodename, daemonset):
1300 _podname = None
1301 _pods = self.kube.get_pods_for_daemonset(daemonset)
1302 for item in _pods.items:
1303 if item.spec.node_name == nodename:
1304 _podname = item.metadata.name
1305
1306 return _podname
1307
1308 def prepare_json_in_pod(self, podname, namespace, targets, filename):
1309 # Iterate pods in daemonset and prepare json file on each one
1310 _target_path = os.path.join(
1311 "/",
1312 "tmp",
1313 filename
1314 )
1315 # check folder will probably not needed as the daemonset links
1316 # configmap there on creation
1317 # _folder = os.path.join(
1318 # self.env_config.kube_node_homepath,
1319 # self.env_config.kube_scripts_folder
1320 # )
1321 # prepare data
1322 buffer = json.dumps(targets, indent=2).encode('utf-8')
1323
1324 # write data to pod using fancy websocket function
1325 self.kube.put_string_buffer_to_pod_as_textfile(
1326 podname,
1327 namespace,
1328 buffer,
1329 _target_path
1330 )
1331
1332 # TODO: Exception handling
1333
1334 return _target_path
Alexb78191f2021-11-02 16:35:46 -05001335
1336 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
1337 """Function runs command on daemonset and parses result into place
1338 or into dict structure provided
1339
1340 :return: no return value, data pulished internally
1341 """
1342 logger_cli.debug(
1343 "... collecting results for '{}'".format(cmd)
1344 )
1345 if target_dict:
1346 _nodes = target_dict
1347 else:
1348 _nodes = self.nodes
1349 # Dirty way to get daemonset that was used in checker and not deleted
1350 _ds = self.prepared_daemonsets[0]
1351 _result = self.execute_cmd_on_daemon_set(_ds, cmd)
1352 for node, data in _nodes.items():
1353
1354 if node in self.skip_list:
1355 logger_cli.debug(
1356 "... '{}' skipped while collecting '{}'".format(
1357 node,
1358 cmd
1359 )
1360 )
1361 continue
1362 # Prepare target key
1363 if target_key not in data:
1364 data[target_key] = None
1365 # Save data
1366 if data['status'] in [NODE_DOWN, NODE_SKIP]:
1367 data[target_key] = None
1368 elif node not in _result:
1369 continue
1370 elif not _result[node]:
1371 logger_cli.debug(
1372 "... '{}' not responded after '{}'".format(
1373 node,
1374 self.env_config.salt_timeout
1375 )
1376 )
1377 data[target_key] = None
1378 else:
1379 data[target_key] = _result[node]
Alex5cace3b2021-11-10 16:40:37 -06001380
1381 def prepare_benchmark_agent(self, index, path, sc, size, template):
1382 # Load pod template
1383 _yaml_file = os.path.join(pkg_dir, 'templates', template)
1384 logger_cli.debug("... loading template '{}'".format(_yaml_file))
1385 _pod = {}
1386 with open(_yaml_file) as podFile:
1387 _pod = yaml.load(podFile, Loader=yaml.SafeLoader)
1388
1389 # set namings
1390 _n = "cfgagent-{:02}".format(index)
1391 _pvc_n = "cfgagent-pvc-{:02}".format(index)
Alex90ac1532021-12-09 11:13:14 -06001392 # _pv_n = "cfgagent-pv-{:02}".format(index)
Alex5cace3b2021-11-10 16:40:37 -06001393
1394 _pod["metadata"]["name"] = _n
1395 _pod["metadata"]["labels"]["name"] = _n
1396 # replace volumeMounts
1397 for _c in _pod["spec"]["containers"]:
1398 for _mnt in _c["volumeMounts"]:
1399 if "placeholder" in _mnt["name"]:
Alex90ac1532021-12-09 11:13:14 -06001400 # _mnt["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001401 _mnt["mountPath"] = path
1402 # replace claim
1403 for _v in _pod["spec"]["volumes"]:
Alex30380a42021-12-20 16:11:20 -06001404 if "cfgagent-pv" in _v["name"]:
Alex90ac1532021-12-09 11:13:14 -06001405 # _v["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001406 _v["persistentVolumeClaim"]["claimName"] = _pvc_n
1407
1408 # init volume resources
Alex90ac1532021-12-09 11:13:14 -06001409 # _pv_object = self.kube.init_pv_resource(_pv_n, sc, size, path)
1410 # _pv = self.kube.prepare_pv(_pv_object)
Alex30380a42021-12-20 16:11:20 -06001411 # update size of the volume to be 15% larger
Alex5cace3b2021-11-10 16:40:37 -06001412 _pvc_object = self.kube.init_pvc_resource(_pvc_n, sc, size)
1413 _pvc = self.kube.prepare_pvc(_pvc_object)
1414
1415 # start pod
1416 _pod = self.kube.prepare_pod_from_yaml(_pod)
1417
Alex90ac1532021-12-09 11:13:14 -06001418 # return _pod, _pv, _pvc
1419 return _pod, _pvc
Alex5cace3b2021-11-10 16:40:37 -06001420
1421 def expose_benchmark_agent(self, agent):
1422 return self.kube.expose_pod_port(agent, 8765)
Alex2a7657c2021-11-10 20:51:34 -06001423
1424 def cleanup_resource_by_name(self, res_type, name, ns=None, wait=False):
1425 """Cleansup resource using string res_type and the ns/name
1426
1427 Args:
1428 res_type (string): resource type name: pod, pv, pvc, svc
1429 name (string): resource name to cleanup
1430 ns (string, optional): Namespace to use. Default is 'qa-space'
1431
1432 return: (Bool) Is Success?
1433 """
1434 # fill defaults
1435 if not ns:
1436 ns = self._namespace
1437 # Handle res_type errors and choose resource type
1438 if not res_type:
1439 logger_cli.debug(
1440 "... resource type invalid: '{}'".format(res_type)
1441 )
1442 return False
1443 elif not name:
1444 logger_cli.debug("... resource name invalid: '{}'".format(name))
1445 return False
1446 elif res_type == "svc":
1447 # Delete service
1448 logger_cli.info("-> deleting svc {}/{}".format(ns, name))
1449 self.kube.CoreV1.delete_namespaced_service(name, ns)
1450 # TODO: Check if successfull
1451 elif res_type == "pod":
1452 # Delete a pod
1453 logger_cli.info("-> deleting pod {}/{}".format(ns, name))
1454 self.kube.CoreV1.delete_namespaced_pod(name, ns)
1455 if wait:
1456 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1457 elif res_type == "pvc":
1458 logger_cli.info("-> deleting pvc {}/{}".format(ns, name))
1459 self.kube.CoreV1.delete_namespaced_persistent_volume_claim(
1460 name,
1461 ns
1462 )
1463 if wait:
1464 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1465 elif res_type == "pv":
1466 logger_cli.info("-> deleting pv {}/{}".format(ns, name))
1467 self.kube.CoreV1.delete_persistent_volume(name)
1468 if wait:
1469 self.kube.wait_for_phase(res_type, name, None, ["Terminated"])
1470
1471 return True
Alexbfa947c2021-11-11 18:14:28 -06001472
1473 def get_resource_phase_by_name(self, typ, name, ns="qa-space"):
1474 if typ == "pod":
1475 _t = self.kube.get_pod_by_name_and_ns(name, ns)
1476 elif typ == "svc":
1477 _t = self.kube.get_svc_by_name_and_ns(name, ns)
1478 elif typ == "pvc":
1479 _t = self.kube.get_pvc_by_name_and_ns(name, ns)
1480 elif typ == "pv":
1481 _t = self.kube.get_pv_by_name(name)
1482 else:
1483 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1484 return None
1485
1486 if _t:
1487 return _t.status.phase
1488 else:
1489 return None
Alexb2129542021-11-23 15:49:42 -06001490
1491 def list_resource_names_by_type_and_ns(self, typ, ns="qa-space"):
1492 if typ == "pod":
1493 _items = self.kube.list_pods(ns)
1494 elif typ == "svc":
1495 _items = self.kube.list_svc(ns)
1496 elif typ == "pvc":
1497 _items = self.kube.list_pvc(ns)
1498 elif typ == "pv":
1499 _items = self.kube.list_pv()
1500 else:
1501 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1502 return None
1503 return [[i.metadata.namespace, i.metadata.name] for i in _items.items]
Alex0989ecf2022-03-29 13:43:21 -05001504
Alex0bcf31b2022-03-29 17:38:58 -05001505 def list_pod_names_with_containers(self, ns="qa-space", running_only=True):
1506 _result = []
1507 _pods = self.kube.list_pods(ns)
1508 if not running_only:
1509 for i in _pods.items:
1510 _result.append([
1511 i.metadata.namespace,
1512 i.metadata.name,
1513 [c.name for c in i.spec.containers]
1514 ])
1515 else:
1516 for i in _pods.items:
1517 if i.status.phase == "Running":
1518 _result.append([
1519 i.metadata.namespace,
1520 i.metadata.name,
1521 [c.name for c in i.status.container_statuses
1522 if c.state.running is not None]
1523 ])
1524 return _result
1525
1526 def get_logs_for_pod(self, podname, container, namespace, tail_lines):
1527 try:
1528 return self.kube.get_pod_logs(
1529 podname,
1530 container,
1531 namespace,
1532 tail_lines=tail_lines
1533 )
1534 except KubeException as e:
1535 logger_cli.warning(
1536 "WARNING: Log retrieval failed: '{}'".format(e.message)
1537 )
1538 return ""
1539
1540 def list_namespaces(self):
1541 return [i.metadata.name for i in self.kube.list_namespaces().items]