blob: 7da09f4cfd4c583455c6034cd1879bcaf839e957 [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alexe0c5b9e2019-04-23 18:51:23 -05003import json
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06004import os
Alex1f90e7b2021-09-03 15:31:28 -05005import yaml
Alex3ebc5632019-04-18 16:47:18 -05006from copy import deepcopy
Alex9a4ad212020-10-01 18:04:25 -05007from multiprocessing.dummy import Pool
Alex1f90e7b2021-09-03 15:31:28 -05008from time import sleep
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06009
Alex9a4ad212020-10-01 18:04:25 -050010from cfg_checker.clients import get_salt_remote, get_kube_remote
11from cfg_checker.common.const import all_salt_roles_map, all_kube_roles_map
Alexe9908f72020-05-19 16:04:53 -050012from cfg_checker.common.const import NODE_UP, NODE_DOWN, NODE_SKIP
Alex9a4ad212020-10-01 18:04:25 -050013from cfg_checker.common.const import ubuntu_versions, nova_openstack_versions
Alex7c9494e2019-04-22 10:40:59 -050014from cfg_checker.common import logger, logger_cli
Alexe0c5b9e2019-04-23 18:51:23 -050015from cfg_checker.common import utils
Alex9a4ad212020-10-01 18:04:25 -050016from cfg_checker.common.file_utils import create_temp_file_with_content
17from cfg_checker.common.exception import SaltException, KubeException
18from cfg_checker.common.ssh_utils import PortForward, SshShell
19from cfg_checker.common.settings import pkg_dir, ENV_TYPE_KUBE, ENV_TYPE_SALT
20from cfg_checker.helpers.console_utils import Progress
21
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060022
23node_tmpl = {
24 'role': '',
25 'node_group': '',
Alexe9908f72020-05-19 16:04:53 -050026 'status': NODE_DOWN,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060027 'pillars': {},
Alex9a4ad212020-10-01 18:04:25 -050028 'grains': {},
29 'raw': {}
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060030}
31
32
Alex9a4ad212020-10-01 18:04:25 -050033def _prepare_skipped_nodes(_names, skip_list, skip_list_file):
34 _skipped_minions = []
35 # skip list file
36 if skip_list_file:
37 _valid, _invalid = utils.get_nodes_list(skip_list_file)
Alex9a4ad212020-10-01 18:04:25 -050038 _skipped_minions.extend(_valid)
Alex359e5752021-08-16 17:28:30 -050039 if len(_invalid) < 1:
40 logger_cli.info(
41 "\n# WARNING: Detected invalid entries "
42 "in nodes skip list:\n{}\n".format(
43 "\n".join(_invalid)
44 )
45 )
Alexe8643642021-08-23 14:08:46 -050046
Alex9a4ad212020-10-01 18:04:25 -050047 # process wildcard, create node list out of mask
48 if skip_list:
49 _list = []
50 _invalid = []
51 for _item in skip_list:
52 if '*' in _item:
53 _str = _item[:_item.index('*')]
54 _nodes = [_m for _m in _names if _m.startswith(_str)]
55 if not _nodes:
56 logger_cli.warn(
57 "# WARNING: No nodes found for {}".format(_item)
58 )
59 _list.extend(_nodes)
60 else:
61 if _item in _names:
62 _list += _item
63 else:
64 logger_cli.warn(
65 "# WARNING: No node found for {}".format(_item)
66 )
67 # removing duplicates
68 _list = list(set(_list))
69 _skipped_minions.extend(_list)
70
71 return _skipped_minions
72
73
74class Nodes(object):
75 def __init__(self, config):
76 self.nodes = None
77 self.env_config = config
78
79 def skip_node(self, node):
80 # Add node to skip list
81 # Fro example if it is fails to comply with the rules
82
83 # check if we know such node
84 if node in self.nodes.keys() and node not in self.skip_list:
85 # yes, add it
86 self.skip_list.append(node)
87 return True
88 else:
89 return False
90
91 def get_nodes(self, skip_list=None, skip_list_file=None):
92 if not self.nodes:
93 if not skip_list and self.env_config.skip_nodes:
94 self.gather_node_info(
95 self.env_config.skip_nodes,
96 skip_list_file
97 )
98 else:
99 self.gather_node_info(skip_list, skip_list_file)
100 return self.nodes
101
102 def get_info(self):
103 _info = {
104 'mcp_release': self.mcp_release,
105 'openstack_release': self.openstack_release
106 }
107 return _info
108
109 def is_node_available(self, node, log=True):
110 if node in self.skip_list:
111 if log:
112 logger_cli.info("-> node '{}' not active".format(node))
113 return False
114 elif node in self.not_responded:
115 if log:
116 logger_cli.info("-> node '{}' not responded".format(node))
117 return False
118 else:
119 return True
120
121
122class SaltNodes(Nodes):
123 def __init__(self, config):
124 super(SaltNodes, self).__init__(config)
Alexe0c5b9e2019-04-23 18:51:23 -0500125 logger_cli.info("# Gathering environment information")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600126 # simple salt rest client
Alex9a4ad212020-10-01 18:04:25 -0500127 self.salt = None
128 self.env_type = ENV_TYPE_SALT
Alex3ebc5632019-04-18 16:47:18 -0500129
Alexe9908f72020-05-19 16:04:53 -0500130 def gather_node_info(self, skip_list, skip_list_file):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600131 # Keys for all nodes
132 # this is not working in scope of 2016.8.3, will overide with list
Alexb151fbe2019-04-22 16:53:30 -0500133 logger_cli.debug("... collecting node names existing in the cloud")
Alexe0c5b9e2019-04-23 18:51:23 -0500134 if not self.salt:
Alex9a4ad212020-10-01 18:04:25 -0500135 self.salt = get_salt_remote(self.env_config)
Alexe0c5b9e2019-04-23 18:51:23 -0500136
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600137 try:
138 _keys = self.salt.list_keys()
139 _str = []
Alex3bc95f62020-03-05 17:00:04 -0600140 for _k, _v in _keys.items():
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600141 _str.append("{}: {}".format(_k, len(_v)))
142 logger_cli.info("-> keys collected: {}".format(", ".join(_str)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600143
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600144 self.node_keys = {
145 'minions': _keys['minions']
146 }
Alex3ebc5632019-04-18 16:47:18 -0500147 except Exception:
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600148 _keys = None
149 self.node_keys = None
Alex3ebc5632019-04-18 16:47:18 -0500150
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600151 # List of minions with grains
152 _minions = self.salt.list_minions()
153 if _minions:
Alex3ebc5632019-04-18 16:47:18 -0500154 logger_cli.info(
155 "-> api reported {} active minions".format(len(_minions))
156 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600157 elif not self.node_keys:
158 # this is the last resort
Alex9a4ad212020-10-01 18:04:25 -0500159 _minions = self.env_config.load_nodes_list()
Alex3ebc5632019-04-18 16:47:18 -0500160 logger_cli.info(
161 "-> {} nodes loaded from list file".format(len(_minions))
162 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600163 else:
164 _minions = self.node_keys['minions']
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600165
Alexe9908f72020-05-19 16:04:53 -0500166 # Skip nodes if needed
Alex9a4ad212020-10-01 18:04:25 -0500167 _skipped_minions = \
168 _prepare_skipped_nodes(_minions, skip_list, skip_list_file)
Alexe9908f72020-05-19 16:04:53 -0500169
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600170 # in case API not listed minions, we need all that answer ping
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600171 _active = self.salt.get_active_nodes()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600172 logger_cli.info("-> nodes responded: {}".format(len(_active)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600173 # iterate through all accepted nodes and create a dict for it
174 self.nodes = {}
Alex Savatieievefa79c42019-03-14 19:14:04 -0500175 self.skip_list = []
Alexe9908f72020-05-19 16:04:53 -0500176 _domains = set()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600177 for _name in _minions:
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600178 _nc = utils.get_node_code(_name)
Alex9a4ad212020-10-01 18:04:25 -0500179 _rmap = all_salt_roles_map
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600180 _role = _rmap[_nc] if _nc in _rmap else 'unknown'
Alexe9908f72020-05-19 16:04:53 -0500181 if _name in _skipped_minions:
182 _status = NODE_SKIP
Alex Savatieievefa79c42019-03-14 19:14:04 -0500183 self.skip_list.append(_name)
Alexe9908f72020-05-19 16:04:53 -0500184 else:
185 _status = NODE_UP if _name in _active else NODE_DOWN
186 if _status == NODE_DOWN:
187 self.skip_list.append(_name)
188 logger_cli.info(
189 "-> '{}' is down, "
190 "added to skip list".format(
191 _name
192 )
193 )
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600194 self.nodes[_name] = deepcopy(node_tmpl)
Alexe9908f72020-05-19 16:04:53 -0500195 self.nodes[_name]['shortname'] = _name.split(".", 1)[0]
196 _domains.add(_name.split(".", 1)[1])
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600197 self.nodes[_name]['node_group'] = _nc
198 self.nodes[_name]['role'] = _role
199 self.nodes[_name]['status'] = _status
Alexe9908f72020-05-19 16:04:53 -0500200 _domains = list(_domains)
201 if len(_domains) > 1:
202 logger_cli.warning(
203 "Multiple domains detected: {}".format(",".join(_domains))
204 )
Alex205546c2020-12-30 19:22:30 -0600205 # TODO: Use domain with biggest node count by default
206 # or force it via config option
Alexe9908f72020-05-19 16:04:53 -0500207 else:
208 self.domain = _domains[0]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500209 logger_cli.info("-> {} nodes inactive".format(len(self.skip_list)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600210 logger_cli.info("-> {} nodes collected".format(len(self.nodes)))
211
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600212 # form an all nodes compound string to use in salt
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600213 self.active_nodes_compound = self.salt.compound_string_from_list(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600214 filter(
Alexe9908f72020-05-19 16:04:53 -0500215 lambda nd: self.nodes[nd]['status'] == NODE_UP,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600216 self.nodes
217 )
218 )
Alex41485522019-04-12 17:26:18 -0500219 # get master node fqdn
Alex3bc95f62020-03-05 17:00:04 -0600220 # _filtered = filter(
221 # lambda nd: self.nodes[nd]['role'] == const.all_roles_map['cfg'],
222 # self.nodes
223 # )
Alex9a4ad212020-10-01 18:04:25 -0500224 _role = all_salt_roles_map['cfg']
Alex3bc95f62020-03-05 17:00:04 -0600225 _filtered = [n for n, v in self.nodes.items() if v['role'] == _role]
Alexe0c5b9e2019-04-23 18:51:23 -0500226 if len(_filtered) < 1:
227 raise SaltException(
228 "No master node detected! Check/Update node role map."
229 )
230 else:
231 self.salt.master_node = _filtered[0]
Alex3ebc5632019-04-18 16:47:18 -0500232
Alex41485522019-04-12 17:26:18 -0500233 # OpenStack versions
234 self.mcp_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500235 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500236 "_param:apt_mk_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500237 )[self.salt.master_node]
Alex41485522019-04-12 17:26:18 -0500238 self.openstack_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500239 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500240 "_param:openstack_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500241 )[self.salt.master_node]
Alexd0391d42019-05-21 18:48:55 -0500242 # Preload codenames
243 # do additional queries to get linux codename and arch for each node
244 self.get_specific_pillar_for_nodes("_param:linux_system_codename")
245 self.get_specific_pillar_for_nodes("_param:linux_system_architecture")
246 for _name in self.nodes.keys():
Alexe9547d82019-06-03 15:22:50 -0500247 _n = self.nodes[_name]
248 if _name not in self.skip_list:
249 _p = _n['pillars']['_param']
250 _n['linux_codename'] = _p['linux_system_codename']
251 _n['linux_arch'] = _p['linux_system_architecture']
Alex41485522019-04-12 17:26:18 -0500252
Alex1839bbf2019-08-22 17:17:21 -0500253 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500254 """Function runs. cmd.run and parses result into place
255 or into dict structure provided
256
257 :return: no return value, data pulished internally
258 """
259 logger_cli.debug(
260 "... collecting results for '{}'".format(cmd)
261 )
262 if target_dict:
263 _nodes = target_dict
264 else:
265 _nodes = self.nodes
Alex1839bbf2019-08-22 17:17:21 -0500266 _result = self.execute_cmd_on_active_nodes(cmd, nodes=nodes)
Alex3bc95f62020-03-05 17:00:04 -0600267 for node, data in _nodes.items():
Alexf3dbe862019-10-07 15:17:04 -0500268
Alex836fac82019-08-22 13:36:16 -0500269 if node in self.skip_list:
270 logger_cli.debug(
271 "... '{}' skipped while collecting '{}'".format(
272 node,
273 cmd
274 )
275 )
276 continue
277 # Prepare target key
278 if target_key not in data:
279 data[target_key] = None
280 # Save data
Alexe9908f72020-05-19 16:04:53 -0500281 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex836fac82019-08-22 13:36:16 -0500282 data[target_key] = None
Alex1839bbf2019-08-22 17:17:21 -0500283 elif node not in _result:
284 continue
Alex836fac82019-08-22 13:36:16 -0500285 elif not _result[node]:
286 logger_cli.debug(
287 "... '{}' not responded after '{}'".format(
288 node,
Alex9a4ad212020-10-01 18:04:25 -0500289 self.env_config.salt_timeout
Alex836fac82019-08-22 13:36:16 -0500290 )
291 )
292 data[target_key] = None
293 else:
294 data[target_key] = _result[node]
295
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600296 def get_specific_pillar_for_nodes(self, pillar_path):
297 """Function gets pillars on given path for all nodes
298
299 :return: no return value, data pulished internally
300 """
Alex3ebc5632019-04-18 16:47:18 -0500301 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500302 "... collecting node pillars for '{}'".format(pillar_path)
Alex3ebc5632019-04-18 16:47:18 -0500303 )
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600304 _result = self.salt.pillar_get(self.active_nodes_compound, pillar_path)
Alex Savatieievefa79c42019-03-14 19:14:04 -0500305 self.not_responded = []
Alex3bc95f62020-03-05 17:00:04 -0600306 for node, data in self.nodes.items():
Alex Savatieievefa79c42019-03-14 19:14:04 -0500307 if node in self.skip_list:
308 logger_cli.debug(
309 "... '{}' skipped while collecting '{}'".format(
310 node,
311 pillar_path
312 )
313 )
314 continue
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600315 _pillar_keys = pillar_path.split(':')
316 _data = data['pillars']
317 # pre-create nested dict
318 for idx in range(0, len(_pillar_keys)-1):
319 _key = _pillar_keys[idx]
320 if _key not in _data:
321 _data[_key] = {}
322 _data = _data[_key]
Alexe9908f72020-05-19 16:04:53 -0500323 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex Savatieievefa79c42019-03-14 19:14:04 -0500324 _data[_pillar_keys[-1]] = None
325 elif not _result[node]:
326 logger_cli.debug(
327 "... '{}' not responded after '{}'".format(
328 node,
Alex9a4ad212020-10-01 18:04:25 -0500329 self.env_config.salt_timeout
Alex Savatieievefa79c42019-03-14 19:14:04 -0500330 )
331 )
332 _data[_pillar_keys[-1]] = None
333 self.not_responded.append(node)
334 else:
335 _data[_pillar_keys[-1]] = _result[node]
Alex3ebc5632019-04-18 16:47:18 -0500336
Alexe0c5b9e2019-04-23 18:51:23 -0500337 def prepare_json_on_node(self, node, _dict, filename):
Alex359e5752021-08-16 17:28:30 -0500338 if node in self.skip_list:
339 logger_cli.debug(
340 "... '{}' skipped while preparing json file of '{}'".format(
341 node,
342 filename
343 )
344 )
345
Alexe0c5b9e2019-04-23 18:51:23 -0500346 # this function assumes that all folders are created
347 _dumps = json.dumps(_dict, indent=2).splitlines()
348 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500349 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alexe0c5b9e2019-04-23 18:51:23 -0500350 )
351 logger_cli.debug(
352 "... uploading data as '{}' "
353 "to master's file cache folder: '{}'".format(
354 filename,
355 _storage_path
356 )
357 )
358 _cache_path = os.path.join(_storage_path, filename)
359 _source_path = os.path.join(
360 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500361 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500362 filename
363 )
364 _target_path = os.path.join(
365 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500366 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500367 filename
368 )
369
370 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
371 self.salt.f_touch_master(_cache_path)
372 self.salt.f_append_master(_cache_path, _dumps)
373 logger.debug("... syncing file to '{}'".format(node))
374 self.salt.get_file(
375 node,
376 _source_path,
377 _target_path,
378 tgt_type="compound"
379 )
380 return _target_path
381
382 def prepare_script_on_active_nodes(self, script_filename):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600383 # Prepare script
384 _p = os.path.join(pkg_dir, 'scripts', script_filename)
385 with open(_p, 'rt') as fd:
386 _script = fd.read().splitlines()
387 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500388 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600389 )
390 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500391 "... uploading script {} "
Alex3ebc5632019-04-18 16:47:18 -0500392 "to master's file cache folder: '{}'".format(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600393 script_filename,
394 _storage_path
395 )
396 )
Alexe0c5b9e2019-04-23 18:51:23 -0500397 self.salt.mkdir(self.salt.master_node, _storage_path)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600398 # Form cache, source and target path
399 _cache_path = os.path.join(_storage_path, script_filename)
400 _source_path = os.path.join(
401 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500402 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600403 script_filename
404 )
405 _target_path = os.path.join(
406 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500407 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600408 script_filename
409 )
410
Alexb151fbe2019-04-22 16:53:30 -0500411 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
Alex3ebc5632019-04-18 16:47:18 -0500412 self.salt.f_touch_master(_cache_path)
413 self.salt.f_append_master(_cache_path, _script)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600414 # command salt to copy file to minions
Alex3ebc5632019-04-18 16:47:18 -0500415 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500416 "... creating script target folder '{}'".format(
Alex3ebc5632019-04-18 16:47:18 -0500417 _cache_path
418 )
419 )
420 self.salt.mkdir(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600421 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600422 os.path.join(
423 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500424 self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600425 ),
426 tgt_type="compound"
427 )
Alex3ebc5632019-04-18 16:47:18 -0500428 logger.debug("... syncing file to nodes")
429 self.salt.get_file(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600430 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600431 _source_path,
432 _target_path,
433 tgt_type="compound"
434 )
Alexe0c5b9e2019-04-23 18:51:23 -0500435 # return path on nodes, just in case
436 return _target_path
437
438 def execute_script_on_node(self, node, script_filename, args=[]):
439 # Prepare path
440 _target_path = os.path.join(
441 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500442 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500443 script_filename
444 )
445
446 # execute script
447 logger.debug("... running script on '{}'".format(node))
448 # handle results for each node
449 _script_arguments = " ".join(args) if args else ""
450 self.not_responded = []
451 _r = self.salt.cmd(
452 node,
453 'cmd.run',
454 param='python {} {}'.format(_target_path, _script_arguments),
455 expr_form="compound"
456 )
457
458 # all false returns means that there is no response
459 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
460 return _r
461
Alex1f90e7b2021-09-03 15:31:28 -0500462 def execute_script_on_active_nodes(self, script_filename, args=None):
Alexe0c5b9e2019-04-23 18:51:23 -0500463 # Prepare path
464 _target_path = os.path.join(
465 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500466 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500467 script_filename
468 )
469
470 # execute script
Alexd0391d42019-05-21 18:48:55 -0500471 logger_cli.debug("... running script")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600472 # handle results for each node
Alex1f90e7b2021-09-03 15:31:28 -0500473 _script_arguments = args if args else ""
Alex Savatieievefa79c42019-03-14 19:14:04 -0500474 self.not_responded = []
475 _r = self.salt.cmd(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600476 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600477 'cmd.run',
478 param='python {} {}'.format(_target_path, _script_arguments),
479 expr_form="compound"
480 )
481
Alex Savatieievefa79c42019-03-14 19:14:04 -0500482 # all false returns means that there is no response
Alex3ebc5632019-04-18 16:47:18 -0500483 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500484 return _r
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600485
Alex1839bbf2019-08-22 17:17:21 -0500486 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500487 # execute cmd
488 self.not_responded = []
489 _r = self.salt.cmd(
Alex1839bbf2019-08-22 17:17:21 -0500490 nodes if nodes else self.active_nodes_compound,
Alex836fac82019-08-22 13:36:16 -0500491 'cmd.run',
492 param=cmd,
493 expr_form="compound"
494 )
495
496 # all false returns means that there is no response
497 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
498 return _r
499
Alex9a4ad212020-10-01 18:04:25 -0500500
501class KubeNodes(Nodes):
502 def __init__(self, config):
503 super(KubeNodes, self).__init__(config)
504 logger_cli.info("# Gathering environment information")
505 # simple salt rest client
506 self.kube = get_kube_remote(self.env_config)
507 self.env_type = ENV_TYPE_KUBE
Alex1f90e7b2021-09-03 15:31:28 -0500508 self._namespace = "qa-space"
509 self._configmap_name = self.env_config.kube_scripts_folder
510
511 # prepare needed resources
Alexb78191f2021-11-02 16:35:46 -0500512 self.prepared_daemonsets = []
Alex0bcf31b2022-03-29 17:38:58 -0500513 # Check if we need resources prepared
514 if not config.prepare_qa_resources:
515 logger_cli.debug("... skipped preparing resources")
516 self._scripts = None
517 return
518 else:
519 self._check_namespace()
520 self._scripts = self._check_config_map()
Alex1f90e7b2021-09-03 15:31:28 -0500521
522 def _check_namespace(self):
523 # ensure namespace
524 logger_cli.debug(
525 "... checking namespace '{}'".format(self._namespace)
526 )
527 if not self.kube.ensure_namespace(self._namespace):
528 raise KubeException(
529 "Failed to manage namespace '{}'".format(self._namespace)
530 )
531
532 def _check_config_map(self):
533 # ensure config map exists
534 logger_cli.debug(
535 "... checking config map '{}'".format(self._configmap_name)
536 )
537 _source = os.path.join(pkg_dir, 'scripts')
538 return self.kube.create_config_map(
539 self._namespace,
540 self._configmap_name,
541 _source
542 )
Alex9a4ad212020-10-01 18:04:25 -0500543
544 def gather_node_info(self, skip_list, skip_list_file):
545 # Gather nodes info and query pod lists for each node
546 logger_cli.debug("... collecting node names existing in the cloud")
547
548 # Gather node names and info
549 _nodes = self.kube.get_node_info()
550 _node_names = list(_nodes.keys())
551 # Skip nodes if needed
552 _skipped_nodes = \
553 _prepare_skipped_nodes(_node_names, skip_list, skip_list_file)
554
555 # Count how many nodes active
556 self._active = [n for n, v in _nodes.items()
557 if v['conditions']['ready']['status']]
558
559 # iterate through all accepted nodes and create a dict for it
560 self.nodes = {}
561 self.skip_list = []
Alex9a4ad212020-10-01 18:04:25 -0500562 for _name in _node_names:
563 if _name in _skipped_nodes:
564 _status = NODE_SKIP
565 self.skip_list.append(_name)
566 else:
567 _status = NODE_UP if _name in self._active else NODE_DOWN
568 if _status == NODE_DOWN:
569 self.skip_list.append(_name)
570 logger_cli.info(
571 "-> '{}' shows 'Ready' as 'False', "
572 "added to skip list".format(
573 _name
574 )
575 )
576 _roles = {}
577 _labels = {}
578 for _label, _value in _nodes[_name]['labels'].items():
579 if _label in all_kube_roles_map:
580 _roles[all_kube_roles_map[_label]] = _value
581 else:
582 _labels[_label] = _value
583
584 self.nodes[_name] = deepcopy(node_tmpl)
585 self.nodes[_name].pop("grains")
586 self.nodes[_name].pop("pillars")
587
588 # hostname
589 self.nodes[_name]['shortname'] = \
590 _nodes[_name]['addresses']['hostname']['address']
Alexe4de1142022-11-04 19:26:03 -0500591 # internal
Alex9a4ad212020-10-01 18:04:25 -0500592 self.nodes[_name]['internalip'] = \
593 _nodes[_name]['addresses']['internalip']['address']
Alexe4de1142022-11-04 19:26:03 -0500594 # alternate
595 if self.env_config.force_node_network is not None:
596 iIP = self.nodes[_name]['internalip']
597 # use last number
598 aIP = self.env_config.force_node_network + iIP.split('.')[-1]
599 self.nodes[_name]["altip"] = aIP
Alex9a4ad212020-10-01 18:04:25 -0500600 self.nodes[_name]['node_group'] = None
601 self.nodes[_name]['labels'] = _labels
602 self.nodes[_name]['roles'] = _roles
603 self.nodes[_name]['status'] = _status
604 # Backward compatibility
605 _info = _nodes[_name]['status']['node_info']
606 self.nodes[_name]['linux_image'] = _info['os_image']
607 self.nodes[_name]['linux_arch'] = _info['architecture']
608
609 _codename = "unknown"
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200610 _info_str = _info['os_image']
611 if _info_str.lower().startswith('ubuntu'):
612 _n, _v, _ = _info_str.split(maxsplit=2)
Alex9a4ad212020-10-01 18:04:25 -0500613 _v, _, _ = _v.rpartition('.') if '.' in _v else (_v, "", "")
614 if _v in ubuntu_versions:
615 _codename = ubuntu_versions[_v].split()[0].lower()
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200616 elif _info_str.lower().startswith('debian'):
617 parts = _info_str.split()
618 for part in parts:
619 if part.startswith("(") and part.endswith(")"):
620 _codename = part.strip("()").lower()
Alex9a4ad212020-10-01 18:04:25 -0500621 self.nodes[_name]['linux_codename'] = _codename
622
623 # Consider per-data type transfer
624 self.nodes[_name]["raw"] = _nodes[_name]
625 # TODO: Investigate how to handle domains in Kube, probably - skip
626 # _domains = list(_domains)
627 # if len(_domains) > 1:
628 # logger_cli.warning(
629 # "Multiple domains detected: {}".format(",".join(_domains))
630 # )
631 # else:
Alex1f90e7b2021-09-03 15:31:28 -0500632 self.domain = "no.domain.in.kube.yet"
Alex9a4ad212020-10-01 18:04:25 -0500633 logger_cli.info(
634 "-> {} nodes collected: {} - active, {} - not active".format(
635 len(self.nodes),
636 len(self._active),
637 len(self.skip_list)
638 )
639 )
640
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200641 _role = "control-plane"
642 _filtered = [n for n, v in self.nodes.items() if 'control-plane' in str(v['labels'])]
Alex9a4ad212020-10-01 18:04:25 -0500643 if len(_filtered) < 1:
644 raise KubeException(
Ievgeniia Zadorozhna463a0c22025-08-07 14:48:08 +0200645 "No {} nodes detected! Check/Update node role map.".format(_role)
Alex9a4ad212020-10-01 18:04:25 -0500646 )
Alex Savatieievefa79c42019-03-14 19:14:04 -0500647 else:
Alex9a4ad212020-10-01 18:04:25 -0500648 _r = [n for n, v in self.nodes.items()
649 if v['status'] != NODE_UP and _role in v['roles']]
650 if len(_r) > 0:
651 logger_cli.warn(
652 "Master nodes are reporting 'NotReady:\n{}".format(
653 "\n".join(_r)
654 )
655 )
656 self.kube.master_node = _filtered[0]
Alexe0c5b9e2019-04-23 18:51:23 -0500657
Alex9a4ad212020-10-01 18:04:25 -0500658 # get specific data upfront
659 # OpenStack versions
660 self.mcp_release = ""
661 # Quick and Dirty way to detect OS release
Alexccb72e02021-01-20 16:38:03 -0600662 try:
663 _nova_version = self.kube.exec_on_target_pod(
664 "nova-manage --version",
665 "nova-api-osapi",
666 "openstack"
667 )
668 _nmajor = _nova_version.partition('.')[0]
669 self.openstack_release = nova_openstack_versions[_nmajor]
670 except KubeException as e:
671 logger_cli.warn("Openstack not detected: {}".format(e.message))
672 self.openstack_release = nova_openstack_versions["00"]
Alexe0c5b9e2019-04-23 18:51:23 -0500673
Alex9a4ad212020-10-01 18:04:25 -0500674 return
675
676 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500677 def _get_ssh_shell(_h, _u, _k, _p, _q, _pipe, timeout=15):
Alex9a4ad212020-10-01 18:04:25 -0500678 _ssh = SshShell(
679 _h,
680 user=_u,
681 keypath=_k,
682 port=_p,
683 silent=_q,
Alex1f90e7b2021-09-03 15:31:28 -0500684 piped=_pipe,
685 timeout=timeout
Alex9a4ad212020-10-01 18:04:25 -0500686 )
687 return _ssh.connect()
688
689 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500690 def _do_ssh_cmd(_cmd, _h, _u, _k, _p, _q, _pipe, timeout=None):
Alex9a4ad212020-10-01 18:04:25 -0500691 with SshShell(
692 _h,
693 user=_u,
694 keypath=_k,
695 port=_p,
696 silent=_q,
697 piped=_pipe
698 ) as ssh:
Alex1f90e7b2021-09-03 15:31:28 -0500699 if timeout is None:
700 _r = ssh.do(_cmd)
701 else:
702 _r = ssh.do(_cmd, timeout=timeout)
Alex9a4ad212020-10-01 18:04:25 -0500703 logger_cli.debug("'{}'".format(_r))
704 return _r
705
706 def node_shell(
707 self,
708 node,
709 silent=True,
710 piped=True,
711 use_sudo=True,
712 fport=None
713 ):
714 _u = self.env_config.kube_node_user
715 _k = self.env_config.kube_node_keypath
Alexe4de1142022-11-04 19:26:03 -0500716
717 _n = self.nodes[node]
718 _h = _n['altip'] if "altip" in _n else _n['internalip']
Alex9a4ad212020-10-01 18:04:25 -0500719 _p = 22
Alexeffa0682021-06-04 12:18:33 -0500720 if self.kube.is_local or self.kube.config.ssh_direct:
Alexf6ec91b2021-09-10 10:11:17 -0500721 logger.debug("Getting shell with no port forward")
722 return [None, self._get_ssh_shell(
Alex1f90e7b2021-09-03 15:31:28 -0500723 _h, _u, _k, _p, silent, piped,
724 timeout=self.kube.config.ssh_connect_timeout
Alexf6ec91b2021-09-10 10:11:17 -0500725 )]
Alex9a4ad212020-10-01 18:04:25 -0500726 else:
Alexf6ec91b2021-09-10 10:11:17 -0500727 logger.debug("Getting shell with with forward")
Alex9a4ad212020-10-01 18:04:25 -0500728 _fh = "localhost"
729 _p = 10022 if not fport else fport
730 _pfwd = PortForward(
731 self.env_config.ssh_host,
732 _h,
733 user=_u,
734 keypath=self.env_config.ssh_key,
Alex1f90e7b2021-09-03 15:31:28 -0500735 loc_port=_p,
736 timeout=self.kube.config.ssh_connect_timeout
Alex9a4ad212020-10-01 18:04:25 -0500737 )
738 _pfwd.connect()
Alex1f90e7b2021-09-03 15:31:28 -0500739 _ssh = self._get_ssh_shell(
740 _fh,
741 _u,
742 _k,
743 _p,
744 silent,
745 piped,
746 timeout=self.kube.config.ssh_connect_timeout
747 )
Alexf6ec91b2021-09-10 10:11:17 -0500748 return [_pfwd, _ssh]
Alex9a4ad212020-10-01 18:04:25 -0500749
750 def execute_script_on_node(self, node, script_filename, args=[]):
751 # Prepare path
752 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600753 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500754 self.env_config.kube_scripts_folder,
755 script_filename
756 )
757
758 # execute script
759 logger_cli.debug("... running script on '{}'".format(node))
760 # handle results for each node
761 _script_arguments = " ".join(args) if args else ""
762 self.not_responded = []
763 # get result
764 _nr = self.node_shell(
765 node,
766 "python {} {}".format(
767 _target_path,
768 _script_arguments
769 )
770 )
771
772 if not _nr:
773 self.not_responded.append(node)
774 return {}
775 else:
776 return {node: _nr}
777
778 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
779 # execute script
780 logger_cli.debug("...running '{}' on active nodes".format(cmd))
781 # handle results for each node
782 self.not_responded = []
783 _r = {}
784 # TODO: Use threading and pool
785 for node in self._active:
786 _nr = self.node_shell(
787 node,
788 cmd
789 )
790
791 if not _nr:
792 self.not_responded.append(node)
793 else:
794 _r[node] = _nr
795
796 return _r
797
Alex1f90e7b2021-09-03 15:31:28 -0500798 def _ssh_exec_script(self, params):
Alex9a4ad212020-10-01 18:04:25 -0500799 """
800 Threadsafe method to get shell to node,
801 check/copy script and get results
802 [
803 node_name,
804 src_path,
805 tgt_path,
806 conf,
807 args
808 ]
809 """
Alex1f90e7b2021-09-03 15:31:28 -0500810 _timeout = self.kube.config.script_execution_timeout
Alex9a4ad212020-10-01 18:04:25 -0500811 _name = params[0]
812 _src = params[1]
813 _tgt = params[2]
814 _conf = params[3]
815 _args = params[4]
816 _port = params[5]
817 _log_name = "["+_name+"]:"
818 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
819 "else echo False; fi)"
820 _fwd_sh, _sh = self.node_shell(
821 _name,
822 use_sudo=False,
823 fport=_port
824 )
825 # check python3
826 _python = _sh.do("which python3")
827 _python = utils.to_bool(
828 _sh.do(_check.format(_python))
829 )
830 if not _python:
Alex1f90e7b2021-09-03 15:31:28 -0500831 _sh.do("apt install python3", sudo=True, timeout=_timeout)
Alex9a4ad212020-10-01 18:04:25 -0500832 # check if script already there
833 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600834 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500835 _conf.kube_scripts_folder
836 )
837 # check if folder exists
838 _folder_exists = utils.to_bool(
839 _sh.do(_check.format(_folder))
840 )
841 if not _folder_exists:
842 _sh.do("mkdir " + _folder)
843 logger.info("{} Syncing file".format(_log_name))
844 _code, _r, _e = _sh.scp(
845 _src,
846 _sh.get_host_path(_tgt),
847 )
848 # handle error code
849 if _code:
850 logger_cli.warn(
851 "{} Error in scp:\n"
852 "\tstdout:'{}'\n"
853 "\tstderr:'{}'".format(_log_name, _r, _e)
854 )
855
856 # execute script
857 logger.debug("{} Running script".format(_log_name))
858 _out = _sh.do(
859 "python3 {}{}".format(
860 _tgt,
861 _args
862 ),
Alex1f90e7b2021-09-03 15:31:28 -0500863 sudo=True,
864 timeout=_timeout
Alex9a4ad212020-10-01 18:04:25 -0500865 )
866
867 if _fwd_sh:
868 _fwd_sh.kill()
869 _sh.kill()
870
871 return [_name, _out]
872
Alex1f90e7b2021-09-03 15:31:28 -0500873 def execute_script_on_active_nodes(self, script_filename, args=None):
Alex9a4ad212020-10-01 18:04:25 -0500874 # Prepare script
875 _source_path = os.path.join(pkg_dir, 'scripts', script_filename)
876 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600877 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500878 self.env_config.kube_scripts_folder,
879 script_filename
880 )
881 # handle results for each node
882 _script_arguments = " ".join(args) if args else ""
883 if _script_arguments:
884 _script_arguments = " " + _script_arguments
885 self.not_responded = []
886 _results = {}
887 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500888 "... running '{}' on active nodes, {} worker threads".format(
Alex9a4ad212020-10-01 18:04:25 -0500889 script_filename,
890 self.env_config.threads
891 )
892 )
893 # Workers pool
894 pool = Pool(self.env_config.threads)
895
896 # init the parameters
897 # node_name,
898 # src_path,
899 # tgt_path,
900 # conf,
901 # args
902 _params = []
903 _port = 10022
904 for node in self._active:
905 # build parameter blocks
906 _p_list = [
907 node,
908 _source_path,
909 _target_path,
910 self.env_config,
911 _script_arguments,
912 _port
913 ]
914 _params.append(_p_list)
915 _port += 1
916
917 _progress = Progress(len(_params))
Alex1f90e7b2021-09-03 15:31:28 -0500918 results = pool.imap_unordered(self._ssh_exec_script, _params)
Alex9a4ad212020-10-01 18:04:25 -0500919
920 for ii in enumerate(results, start=1):
921 if not ii[1][1]:
922 self.not_responded.append(ii[1][0])
923 else:
924 _results[ii[1][0]] = ii[1][1]
925 _progress.write_progress(ii[0])
926
927 _progress.end()
928 pool.close()
929 pool.join()
930
931 # return path on nodes, just in case
932 return _results
933
934 def prepare_json_on_node(self, node, _dict, filename):
935 # this function assumes that all folders are created
936 _dumps = json.dumps(_dict, indent=2).splitlines()
937
938 _source_path = create_temp_file_with_content(_dumps)
939 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600940 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500941 self.env_config.kube_scripts_folder,
942 filename
943 )
944 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600945 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500946 self.env_config.kube_scripts_folder
947 )
948 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
949 "else echo False; fi)"
950 _fwd_sh, _sh = self.node_shell(
951 node,
952 use_sudo=False
953 )
954
955 # check if folder exists
956 _folder_exists = utils.to_bool(
957 _sh.do(_check.format(_folder))
958 )
959 if not _folder_exists:
960 _sh.do("mkdir " + _folder)
961 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500962 "... create data on node '{}':'{}'".format(node, _target_path)
Alex9a4ad212020-10-01 18:04:25 -0500963 )
964 _code, _r, _e = _sh.scp(
965 _source_path,
966 _sh.get_host_path(_target_path),
967 )
968 # handle error code
969 if _code:
970 logger_cli.warn(
971 "Error in scp:\n"
972 "\tstdout:'{}'\n"
973 "\tstderr:'{}'".format(_r, _e)
974 )
975
976 _fwd_sh.kill()
977 _sh.kill()
978 return _target_path
Alex1f90e7b2021-09-03 15:31:28 -0500979
Alex7b0ee9a2021-09-21 17:16:17 -0500980 def prepare_daemonset(self, template_filename):
Alex1f90e7b2021-09-03 15:31:28 -0500981 # load template
982 _yaml_file = os.path.join(pkg_dir, 'templates', template_filename)
983 logger_cli.debug("... loading template '{}'".format(_yaml_file))
984 _ds = {}
985 with open(_yaml_file) as dsFile:
986 _ds = yaml.load(dsFile, Loader=yaml.SafeLoader)
987
988 # Add scripts to pod template as volumeMounts
989 _tspec = _ds['spec']['template']['spec']
990 _tspec['containers'][0]['volumeMounts'] = [
991 {
992 "name": "scripts",
993 "mountPath": os.path.join(
994 "/",
995 self.env_config.kube_scripts_folder
996 )
997 }
998 ]
999
1000 _tspec['volumes'] = [
1001 {
1002 "name": "scripts",
1003 "configMap": {
1004 "name": self._configmap_name
1005 }
1006 }
1007 ]
1008
1009 # create daemonset
1010 logger_cli.debug("... preparing daemonset")
Alexb78191f2021-11-02 16:35:46 -05001011 _ds = self.kube.prepare_daemonset_from_yaml(self._namespace, _ds)
1012 # Save prepared daemonset
1013 self.prepared_daemonsets.append(_ds)
1014 # return it
1015 return _ds
Alex1f90e7b2021-09-03 15:31:28 -05001016
1017 def wait_for_daemonset(self, ds, timeout=120):
1018 # iteration timeout
1019 _sleep_time = 5
1020 _timeout = timeout
1021
1022 # query daemonset and check that desired=scheduled=ready
1023 _ds = self.kube.get_daemon_set_by_name(
1024 ds.metadata.namespace,
1025 ds.metadata.name
1026 )
1027
Alex0bcf31b2022-03-29 17:38:58 -05001028 _total = len(self.nodes) - len(self.skip_list)
Alex1f90e7b2021-09-03 15:31:28 -05001029 # _scheduled = _ds.status.scheduled
1030 # _ready = _ds.status.ready
1031
1032 # Init Progress bar to show daemonset readiness
1033 _progress = Progress(_total)
1034 while _timeout > 0:
1035 # get new status
1036 _ds = self.kube.get_daemon_set_by_name(
1037 ds.metadata.namespace,
1038 ds.metadata.name
1039 )
1040 _desired = _ds.status.desired_number_scheduled
1041 _scheduled = _ds.status.current_number_scheduled
1042 _ready = _ds.status.number_ready
1043 _updated = _ds.status.updated_number_scheduled
1044 # print it
1045 _progress.write_progress(
1046 _ready,
1047 note="desired: {}, scheduled: {}, ready: {},"
1048 " up-to-date: {}".format(
1049 _desired,
1050 _scheduled,
1051 _ready,
1052 _updated
1053 )
1054 )
1055
1056 # check values and return
1057 # In case of Update, also checking _updated value
Alex163aa042022-12-01 11:58:32 -06001058 if _ready == _updated and _ready == _total - len(self.skip_list):
Alex1f90e7b2021-09-03 15:31:28 -05001059 # close progress bar class
1060 _progress.end()
1061 logger_cli.debug("... daemonset is ready")
1062 return True
1063 # iterate
1064 _timeout -= _sleep_time
1065 # wait
1066 sleep(_sleep_time)
1067
1068 # timed out
1069 _progress.end()
1070 # log it
1071 logger_cli.error("Timed out waiting for Daemonset to be ready")
1072 return False
1073
Alexdcb792f2021-10-04 14:24:21 -05001074 def exec_script_on_target_pod(self, pod_name, script_filename, args=None):
Alex7b0ee9a2021-09-21 17:16:17 -05001075 """
1076 Run script from configmap on target pod assuming it is present
1077 """
1078 _arguments = args if args else ""
1079 _cmd = [
1080 "python3",
1081 os.path.join(
1082 "/",
1083 self.env_config.kube_scripts_folder,
1084 script_filename
1085 )
1086 ] + _arguments
1087 _result = self.kube.exec_on_target_pod(
1088 _cmd,
1089 pod_name,
1090 self._namespace,
1091 strict=True
1092 )
1093 return _result
1094
Alexdcb792f2021-10-04 14:24:21 -05001095 def exec_cmd_on_target_pod(self, pod_name, ns, command_str):
1096 """
Alex0bcf31b2022-03-29 17:38:58 -05001097 Run cmd on target pod
1098
Alexdcb792f2021-10-04 14:24:21 -05001099 """
1100 _result = self.kube.exec_on_target_pod(
1101 command_str,
1102 pod_name,
1103 ns,
1104 strict=True
1105 )
1106 return _result
1107
Alexb78191f2021-11-02 16:35:46 -05001108 def execute_cmd_on_daemon_set(
1109 self,
1110 ds,
1111 cmd,
Alexb2129542021-11-23 15:49:42 -06001112 _args=None,
Alexb78191f2021-11-02 16:35:46 -05001113 is_script=False
1114 ):
Alex1f90e7b2021-09-03 15:31:28 -05001115 """
1116 Query daemonset for pods and execute script on all of them
1117 """
Alexb2129542021-11-23 15:49:42 -06001118 _results = self.exec_cmd_on_pods(
1119 self.kube.get_pods_for_daemonset(ds),
1120 cmd,
1121 _args=_args,
1122 is_script=is_script
1123 )
1124 # Update results
1125 _ds_results = {}
Alex0bcf31b2022-03-29 17:38:58 -05001126 # only node name and result is needed
1127 # pod name and cmd ignored
1128 for _n, _, _v, _ in _results:
Alexb2129542021-11-23 15:49:42 -06001129 _ds_results[_n] = _v
1130 return _ds_results
1131
Alexe4de1142022-11-04 19:26:03 -05001132 def exec_on_labeled_pods_and_ns(
1133 self,
1134 label_str,
1135 cmd,
1136 _args=None,
1137 ns=None,
1138 silent=False
1139 ):
Alexb2129542021-11-23 15:49:42 -06001140 if not ns:
1141 ns = self._namespace
1142 _results = self.exec_cmd_on_pods(
1143 self.kube.list_pods(ns, label_str=label_str),
1144 cmd,
Alexe4de1142022-11-04 19:26:03 -05001145 _args=_args,
1146 silent=silent
Alexb2129542021-11-23 15:49:42 -06001147 )
1148 _pod_results = {}
1149 for _, _p, _v in _results:
1150 _pod_results[_p] = _v
1151 return _pod_results
1152
Alex0bcf31b2022-03-29 17:38:58 -05001153 def _pooled_exec_on_pod(self, plist, silent=False):
Alex1f90e7b2021-09-03 15:31:28 -05001154 def _kube_exec_on_pod(plist):
1155 return [
1156 plist[1], # node
1157 plist[3], # pod name
1158 plist[0].kube.exec_on_target_pod( # pointer to function
1159 plist[4], # cmd
1160 plist[3], # pod name
1161 plist[2], # namespace
1162 strict=True,
1163 _request_timeout=120,
Alexb78191f2021-11-02 16:35:46 -05001164 arguments=plist[5]
Alex0bcf31b2022-03-29 17:38:58 -05001165 ),
1166 # save cmd used
1167 plist[4]
Alex1f90e7b2021-09-03 15:31:28 -05001168 ]
Alex0bcf31b2022-03-29 17:38:58 -05001169 # map func and cmd
1170 pool = Pool(self.env_config.threads)
1171 _results = []
1172 self.not_responded = []
1173 # create result list
1174 if not silent:
1175 _progress = Progress(len(plist))
1176 ret = pool.imap_unordered(_kube_exec_on_pod, plist)
1177
1178 for ii in enumerate(ret, start=1):
1179 if not ii[1][1]:
1180 self.not_responded.append(ii[1][0])
1181 else:
1182 _results.append(ii[1])
1183 if not silent:
1184 _progress.write_progress(ii[0])
1185
1186 if not silent:
1187 _progress.end()
1188 pool.close()
1189 pool.join()
1190 logger_cli.debug(
1191 "... done, {} total outputs; {} not responded".format(
1192 len(_results),
1193 len(self.not_responded)
1194 )
1195 )
1196 return _results
1197
1198 def exec_cmd_on_pods(
1199 self,
1200 pod_list,
1201 cmd,
1202 _args=None,
1203 is_script=False,
1204 silent=False
1205 ):
Alex1f90e7b2021-09-03 15:31:28 -05001206
Alex1f90e7b2021-09-03 15:31:28 -05001207 # Create map for threads: [[node_name, ns, pod_name, cmd]...]
1208 logger_cli.debug(
1209 "... runnning script on {} pods using {} threads at a time".format(
Alexb2129542021-11-23 15:49:42 -06001210 len(pod_list.items),
Alex1f90e7b2021-09-03 15:31:28 -05001211 self.env_config.threads
1212 )
1213 )
1214 _plist = []
Alexb2129542021-11-23 15:49:42 -06001215 _arguments = _args if _args else ""
Alexb78191f2021-11-02 16:35:46 -05001216 if is_script:
1217 _cmd = [
1218 "python3",
1219 os.path.join(
1220 "/",
1221 self.env_config.kube_scripts_folder,
1222 cmd
1223 ),
1224 _arguments
1225 ]
1226 _cmd = " ".join(_cmd)
1227 else:
1228 # decide if we are to wrap it to bash
1229 if '|' in cmd:
1230 _cmd = "bash -c"
1231 _arguments = cmd
1232 else:
1233 _cmd = cmd
Alexb2129542021-11-23 15:49:42 -06001234 for item in pod_list.items:
Alex1f90e7b2021-09-03 15:31:28 -05001235 _plist.append(
1236 [
1237 self,
1238 item.spec.node_name,
1239 item.metadata.namespace,
1240 item.metadata.name,
Alexb78191f2021-11-02 16:35:46 -05001241 _cmd,
1242 _arguments
Alex1f90e7b2021-09-03 15:31:28 -05001243 ]
1244 )
1245
Alex0bcf31b2022-03-29 17:38:58 -05001246 return self._pooled_exec_on_pod(_plist, silent=silent)
Alex1f90e7b2021-09-03 15:31:28 -05001247
Alex0bcf31b2022-03-29 17:38:58 -05001248 def exec_cmds_on_pod(self, pod, cmd_list):
Alex1f90e7b2021-09-03 15:31:28 -05001249 logger_cli.debug(
Alex0bcf31b2022-03-29 17:38:58 -05001250 "... runnning {} cmds using {} threads at a time".format(
1251 len(cmd_list),
1252 self.env_config.threads
Alex1f90e7b2021-09-03 15:31:28 -05001253 )
1254 )
Alex0bcf31b2022-03-29 17:38:58 -05001255 _plist = []
1256 # decide if we are to wrap it to bash
1257 for item in cmd_list:
1258 if '|' in item:
1259 _cmd = "bash -c"
1260 _arguments = item
1261 else:
1262 _cmd = item
1263 _arguments = ""
1264 _plist.append(
1265 [
1266 self,
1267 pod.spec.node_name,
1268 pod.metadata.namespace,
1269 pod.metadata.name,
1270 _cmd,
1271 _arguments
1272 ]
1273 )
1274
1275 return self._pooled_exec_on_pod(_plist)
Alex1f90e7b2021-09-03 15:31:28 -05001276
1277 def delete_daemonset(self, ds):
1278 # Try to delete daemonset
1279 try:
1280 _r = self.kube.delete_daemon_set_by_name(
1281 ds.metadata.namespace,
1282 ds.metadata.name
1283 )
1284 except Exception as e:
1285 logger_cli.warning("Failed to delete daemonset '{}': {}".format(
1286 ds.metadata.name,
1287 e.reason
1288 ))
1289 _r = None
1290 return _r
Alex7b0ee9a2021-09-21 17:16:17 -05001291
1292 def get_pod_name_in_daemonset_by_node(self, nodename, daemonset):
1293 _podname = None
1294 _pods = self.kube.get_pods_for_daemonset(daemonset)
1295 for item in _pods.items:
1296 if item.spec.node_name == nodename:
1297 _podname = item.metadata.name
1298
1299 return _podname
1300
1301 def prepare_json_in_pod(self, podname, namespace, targets, filename):
1302 # Iterate pods in daemonset and prepare json file on each one
1303 _target_path = os.path.join(
1304 "/",
1305 "tmp",
1306 filename
1307 )
1308 # check folder will probably not needed as the daemonset links
1309 # configmap there on creation
1310 # _folder = os.path.join(
1311 # self.env_config.kube_node_homepath,
1312 # self.env_config.kube_scripts_folder
1313 # )
1314 # prepare data
1315 buffer = json.dumps(targets, indent=2).encode('utf-8')
1316
1317 # write data to pod using fancy websocket function
1318 self.kube.put_string_buffer_to_pod_as_textfile(
1319 podname,
1320 namespace,
1321 buffer,
1322 _target_path
1323 )
1324
1325 # TODO: Exception handling
1326
1327 return _target_path
Alexb78191f2021-11-02 16:35:46 -05001328
1329 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
1330 """Function runs command on daemonset and parses result into place
1331 or into dict structure provided
1332
1333 :return: no return value, data pulished internally
1334 """
1335 logger_cli.debug(
1336 "... collecting results for '{}'".format(cmd)
1337 )
1338 if target_dict:
1339 _nodes = target_dict
1340 else:
1341 _nodes = self.nodes
1342 # Dirty way to get daemonset that was used in checker and not deleted
1343 _ds = self.prepared_daemonsets[0]
1344 _result = self.execute_cmd_on_daemon_set(_ds, cmd)
1345 for node, data in _nodes.items():
1346
1347 if node in self.skip_list:
1348 logger_cli.debug(
1349 "... '{}' skipped while collecting '{}'".format(
1350 node,
1351 cmd
1352 )
1353 )
1354 continue
1355 # Prepare target key
1356 if target_key not in data:
1357 data[target_key] = None
1358 # Save data
1359 if data['status'] in [NODE_DOWN, NODE_SKIP]:
1360 data[target_key] = None
1361 elif node not in _result:
1362 continue
1363 elif not _result[node]:
1364 logger_cli.debug(
1365 "... '{}' not responded after '{}'".format(
1366 node,
1367 self.env_config.salt_timeout
1368 )
1369 )
1370 data[target_key] = None
1371 else:
1372 data[target_key] = _result[node]
Alex5cace3b2021-11-10 16:40:37 -06001373
1374 def prepare_benchmark_agent(self, index, path, sc, size, template):
1375 # Load pod template
1376 _yaml_file = os.path.join(pkg_dir, 'templates', template)
1377 logger_cli.debug("... loading template '{}'".format(_yaml_file))
1378 _pod = {}
1379 with open(_yaml_file) as podFile:
1380 _pod = yaml.load(podFile, Loader=yaml.SafeLoader)
1381
1382 # set namings
1383 _n = "cfgagent-{:02}".format(index)
1384 _pvc_n = "cfgagent-pvc-{:02}".format(index)
Alex90ac1532021-12-09 11:13:14 -06001385 # _pv_n = "cfgagent-pv-{:02}".format(index)
Alex5cace3b2021-11-10 16:40:37 -06001386
1387 _pod["metadata"]["name"] = _n
1388 _pod["metadata"]["labels"]["name"] = _n
1389 # replace volumeMounts
1390 for _c in _pod["spec"]["containers"]:
1391 for _mnt in _c["volumeMounts"]:
1392 if "placeholder" in _mnt["name"]:
Alex90ac1532021-12-09 11:13:14 -06001393 # _mnt["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001394 _mnt["mountPath"] = path
1395 # replace claim
1396 for _v in _pod["spec"]["volumes"]:
Alex30380a42021-12-20 16:11:20 -06001397 if "cfgagent-pv" in _v["name"]:
Alex90ac1532021-12-09 11:13:14 -06001398 # _v["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001399 _v["persistentVolumeClaim"]["claimName"] = _pvc_n
1400
1401 # init volume resources
Alex90ac1532021-12-09 11:13:14 -06001402 # _pv_object = self.kube.init_pv_resource(_pv_n, sc, size, path)
1403 # _pv = self.kube.prepare_pv(_pv_object)
Alex30380a42021-12-20 16:11:20 -06001404 # update size of the volume to be 15% larger
Alex5cace3b2021-11-10 16:40:37 -06001405 _pvc_object = self.kube.init_pvc_resource(_pvc_n, sc, size)
1406 _pvc = self.kube.prepare_pvc(_pvc_object)
1407
1408 # start pod
1409 _pod = self.kube.prepare_pod_from_yaml(_pod)
1410
Alex90ac1532021-12-09 11:13:14 -06001411 # return _pod, _pv, _pvc
1412 return _pod, _pvc
Alex5cace3b2021-11-10 16:40:37 -06001413
1414 def expose_benchmark_agent(self, agent):
1415 return self.kube.expose_pod_port(agent, 8765)
Alex2a7657c2021-11-10 20:51:34 -06001416
1417 def cleanup_resource_by_name(self, res_type, name, ns=None, wait=False):
1418 """Cleansup resource using string res_type and the ns/name
1419
1420 Args:
1421 res_type (string): resource type name: pod, pv, pvc, svc
1422 name (string): resource name to cleanup
1423 ns (string, optional): Namespace to use. Default is 'qa-space'
1424
1425 return: (Bool) Is Success?
1426 """
1427 # fill defaults
1428 if not ns:
1429 ns = self._namespace
1430 # Handle res_type errors and choose resource type
1431 if not res_type:
1432 logger_cli.debug(
1433 "... resource type invalid: '{}'".format(res_type)
1434 )
1435 return False
1436 elif not name:
1437 logger_cli.debug("... resource name invalid: '{}'".format(name))
1438 return False
1439 elif res_type == "svc":
1440 # Delete service
1441 logger_cli.info("-> deleting svc {}/{}".format(ns, name))
1442 self.kube.CoreV1.delete_namespaced_service(name, ns)
1443 # TODO: Check if successfull
1444 elif res_type == "pod":
1445 # Delete a pod
1446 logger_cli.info("-> deleting pod {}/{}".format(ns, name))
1447 self.kube.CoreV1.delete_namespaced_pod(name, ns)
1448 if wait:
1449 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1450 elif res_type == "pvc":
1451 logger_cli.info("-> deleting pvc {}/{}".format(ns, name))
1452 self.kube.CoreV1.delete_namespaced_persistent_volume_claim(
1453 name,
1454 ns
1455 )
1456 if wait:
1457 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1458 elif res_type == "pv":
1459 logger_cli.info("-> deleting pv {}/{}".format(ns, name))
1460 self.kube.CoreV1.delete_persistent_volume(name)
1461 if wait:
1462 self.kube.wait_for_phase(res_type, name, None, ["Terminated"])
1463
1464 return True
Alexbfa947c2021-11-11 18:14:28 -06001465
1466 def get_resource_phase_by_name(self, typ, name, ns="qa-space"):
1467 if typ == "pod":
1468 _t = self.kube.get_pod_by_name_and_ns(name, ns)
1469 elif typ == "svc":
1470 _t = self.kube.get_svc_by_name_and_ns(name, ns)
1471 elif typ == "pvc":
1472 _t = self.kube.get_pvc_by_name_and_ns(name, ns)
1473 elif typ == "pv":
1474 _t = self.kube.get_pv_by_name(name)
1475 else:
1476 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1477 return None
1478
1479 if _t:
1480 return _t.status.phase
1481 else:
1482 return None
Alexb2129542021-11-23 15:49:42 -06001483
1484 def list_resource_names_by_type_and_ns(self, typ, ns="qa-space"):
1485 if typ == "pod":
1486 _items = self.kube.list_pods(ns)
1487 elif typ == "svc":
1488 _items = self.kube.list_svc(ns)
1489 elif typ == "pvc":
1490 _items = self.kube.list_pvc(ns)
1491 elif typ == "pv":
1492 _items = self.kube.list_pv()
1493 else:
1494 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1495 return None
1496 return [[i.metadata.namespace, i.metadata.name] for i in _items.items]
Alex0989ecf2022-03-29 13:43:21 -05001497
Alex0bcf31b2022-03-29 17:38:58 -05001498 def list_pod_names_with_containers(self, ns="qa-space", running_only=True):
1499 _result = []
1500 _pods = self.kube.list_pods(ns)
1501 if not running_only:
1502 for i in _pods.items:
1503 _result.append([
1504 i.metadata.namespace,
1505 i.metadata.name,
1506 [c.name for c in i.spec.containers]
1507 ])
1508 else:
1509 for i in _pods.items:
1510 if i.status.phase == "Running":
1511 _result.append([
1512 i.metadata.namespace,
1513 i.metadata.name,
1514 [c.name for c in i.status.container_statuses
1515 if c.state.running is not None]
1516 ])
1517 return _result
1518
1519 def get_logs_for_pod(self, podname, container, namespace, tail_lines):
1520 try:
1521 return self.kube.get_pod_logs(
1522 podname,
1523 container,
1524 namespace,
1525 tail_lines=tail_lines
1526 )
1527 except KubeException as e:
1528 logger_cli.warning(
1529 "WARNING: Log retrieval failed: '{}'".format(e.message)
1530 )
1531 return ""
1532
1533 def list_namespaces(self):
1534 return [i.metadata.name for i in self.kube.list_namespaces().items]