blob: 7c09690b3290a4b536ea22c82f761754e63f5a75 [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alexe0c5b9e2019-04-23 18:51:23 -05003import json
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06004import os
Alex1f90e7b2021-09-03 15:31:28 -05005import yaml
Alex3ebc5632019-04-18 16:47:18 -05006from copy import deepcopy
Alex9a4ad212020-10-01 18:04:25 -05007from multiprocessing.dummy import Pool
Alex1f90e7b2021-09-03 15:31:28 -05008from time import sleep
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06009
Alex9a4ad212020-10-01 18:04:25 -050010from cfg_checker.clients import get_salt_remote, get_kube_remote
11from cfg_checker.common.const import all_salt_roles_map, all_kube_roles_map
Alexe9908f72020-05-19 16:04:53 -050012from cfg_checker.common.const import NODE_UP, NODE_DOWN, NODE_SKIP
Alex9a4ad212020-10-01 18:04:25 -050013from cfg_checker.common.const import ubuntu_versions, nova_openstack_versions
Alex7c9494e2019-04-22 10:40:59 -050014from cfg_checker.common import logger, logger_cli
Alexe0c5b9e2019-04-23 18:51:23 -050015from cfg_checker.common import utils
Alex9a4ad212020-10-01 18:04:25 -050016from cfg_checker.common.file_utils import create_temp_file_with_content
17from cfg_checker.common.exception import SaltException, KubeException
18from cfg_checker.common.ssh_utils import PortForward, SshShell
19from cfg_checker.common.settings import pkg_dir, ENV_TYPE_KUBE, ENV_TYPE_SALT
20from cfg_checker.helpers.console_utils import Progress
21
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060022
23node_tmpl = {
24 'role': '',
25 'node_group': '',
Alexe9908f72020-05-19 16:04:53 -050026 'status': NODE_DOWN,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060027 'pillars': {},
Alex9a4ad212020-10-01 18:04:25 -050028 'grains': {},
29 'raw': {}
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060030}
31
32
Alex9a4ad212020-10-01 18:04:25 -050033def _prepare_skipped_nodes(_names, skip_list, skip_list_file):
34 _skipped_minions = []
35 # skip list file
36 if skip_list_file:
37 _valid, _invalid = utils.get_nodes_list(skip_list_file)
Alex9a4ad212020-10-01 18:04:25 -050038 _skipped_minions.extend(_valid)
Alex359e5752021-08-16 17:28:30 -050039 if len(_invalid) < 1:
40 logger_cli.info(
41 "\n# WARNING: Detected invalid entries "
42 "in nodes skip list:\n{}\n".format(
43 "\n".join(_invalid)
44 )
45 )
Alexe8643642021-08-23 14:08:46 -050046
Alex9a4ad212020-10-01 18:04:25 -050047 # process wildcard, create node list out of mask
48 if skip_list:
49 _list = []
50 _invalid = []
51 for _item in skip_list:
52 if '*' in _item:
53 _str = _item[:_item.index('*')]
54 _nodes = [_m for _m in _names if _m.startswith(_str)]
55 if not _nodes:
56 logger_cli.warn(
57 "# WARNING: No nodes found for {}".format(_item)
58 )
59 _list.extend(_nodes)
60 else:
61 if _item in _names:
62 _list += _item
63 else:
64 logger_cli.warn(
65 "# WARNING: No node found for {}".format(_item)
66 )
67 # removing duplicates
68 _list = list(set(_list))
69 _skipped_minions.extend(_list)
70
71 return _skipped_minions
72
73
74class Nodes(object):
75 def __init__(self, config):
76 self.nodes = None
77 self.env_config = config
78
79 def skip_node(self, node):
80 # Add node to skip list
81 # Fro example if it is fails to comply with the rules
82
83 # check if we know such node
84 if node in self.nodes.keys() and node not in self.skip_list:
85 # yes, add it
86 self.skip_list.append(node)
87 return True
88 else:
89 return False
90
91 def get_nodes(self, skip_list=None, skip_list_file=None):
92 if not self.nodes:
93 if not skip_list and self.env_config.skip_nodes:
94 self.gather_node_info(
95 self.env_config.skip_nodes,
96 skip_list_file
97 )
98 else:
99 self.gather_node_info(skip_list, skip_list_file)
100 return self.nodes
101
102 def get_info(self):
103 _info = {
104 'mcp_release': self.mcp_release,
105 'openstack_release': self.openstack_release
106 }
107 return _info
108
109 def is_node_available(self, node, log=True):
110 if node in self.skip_list:
111 if log:
112 logger_cli.info("-> node '{}' not active".format(node))
113 return False
114 elif node in self.not_responded:
115 if log:
116 logger_cli.info("-> node '{}' not responded".format(node))
117 return False
118 else:
119 return True
120
121
122class SaltNodes(Nodes):
123 def __init__(self, config):
124 super(SaltNodes, self).__init__(config)
Alexe0c5b9e2019-04-23 18:51:23 -0500125 logger_cli.info("# Gathering environment information")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600126 # simple salt rest client
Alex9a4ad212020-10-01 18:04:25 -0500127 self.salt = None
128 self.env_type = ENV_TYPE_SALT
Alex3ebc5632019-04-18 16:47:18 -0500129
Alexe9908f72020-05-19 16:04:53 -0500130 def gather_node_info(self, skip_list, skip_list_file):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600131 # Keys for all nodes
132 # this is not working in scope of 2016.8.3, will overide with list
Alexb151fbe2019-04-22 16:53:30 -0500133 logger_cli.debug("... collecting node names existing in the cloud")
Alexe0c5b9e2019-04-23 18:51:23 -0500134 if not self.salt:
Alex9a4ad212020-10-01 18:04:25 -0500135 self.salt = get_salt_remote(self.env_config)
Alexe0c5b9e2019-04-23 18:51:23 -0500136
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600137 try:
138 _keys = self.salt.list_keys()
139 _str = []
Alex3bc95f62020-03-05 17:00:04 -0600140 for _k, _v in _keys.items():
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600141 _str.append("{}: {}".format(_k, len(_v)))
142 logger_cli.info("-> keys collected: {}".format(", ".join(_str)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600143
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600144 self.node_keys = {
145 'minions': _keys['minions']
146 }
Alex3ebc5632019-04-18 16:47:18 -0500147 except Exception:
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600148 _keys = None
149 self.node_keys = None
Alex3ebc5632019-04-18 16:47:18 -0500150
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600151 # List of minions with grains
152 _minions = self.salt.list_minions()
153 if _minions:
Alex3ebc5632019-04-18 16:47:18 -0500154 logger_cli.info(
155 "-> api reported {} active minions".format(len(_minions))
156 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600157 elif not self.node_keys:
158 # this is the last resort
Alex9a4ad212020-10-01 18:04:25 -0500159 _minions = self.env_config.load_nodes_list()
Alex3ebc5632019-04-18 16:47:18 -0500160 logger_cli.info(
161 "-> {} nodes loaded from list file".format(len(_minions))
162 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600163 else:
164 _minions = self.node_keys['minions']
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600165
Alexe9908f72020-05-19 16:04:53 -0500166 # Skip nodes if needed
Alex9a4ad212020-10-01 18:04:25 -0500167 _skipped_minions = \
168 _prepare_skipped_nodes(_minions, skip_list, skip_list_file)
Alexe9908f72020-05-19 16:04:53 -0500169
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600170 # in case API not listed minions, we need all that answer ping
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600171 _active = self.salt.get_active_nodes()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600172 logger_cli.info("-> nodes responded: {}".format(len(_active)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600173 # iterate through all accepted nodes and create a dict for it
174 self.nodes = {}
Alex Savatieievefa79c42019-03-14 19:14:04 -0500175 self.skip_list = []
Alexe9908f72020-05-19 16:04:53 -0500176 _domains = set()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600177 for _name in _minions:
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600178 _nc = utils.get_node_code(_name)
Alex9a4ad212020-10-01 18:04:25 -0500179 _rmap = all_salt_roles_map
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600180 _role = _rmap[_nc] if _nc in _rmap else 'unknown'
Alexe9908f72020-05-19 16:04:53 -0500181 if _name in _skipped_minions:
182 _status = NODE_SKIP
Alex Savatieievefa79c42019-03-14 19:14:04 -0500183 self.skip_list.append(_name)
Alexe9908f72020-05-19 16:04:53 -0500184 else:
185 _status = NODE_UP if _name in _active else NODE_DOWN
186 if _status == NODE_DOWN:
187 self.skip_list.append(_name)
188 logger_cli.info(
189 "-> '{}' is down, "
190 "added to skip list".format(
191 _name
192 )
193 )
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600194 self.nodes[_name] = deepcopy(node_tmpl)
Alexe9908f72020-05-19 16:04:53 -0500195 self.nodes[_name]['shortname'] = _name.split(".", 1)[0]
196 _domains.add(_name.split(".", 1)[1])
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600197 self.nodes[_name]['node_group'] = _nc
198 self.nodes[_name]['role'] = _role
199 self.nodes[_name]['status'] = _status
Alexe9908f72020-05-19 16:04:53 -0500200 _domains = list(_domains)
201 if len(_domains) > 1:
202 logger_cli.warning(
203 "Multiple domains detected: {}".format(",".join(_domains))
204 )
Alex205546c2020-12-30 19:22:30 -0600205 # TODO: Use domain with biggest node count by default
206 # or force it via config option
Alexe9908f72020-05-19 16:04:53 -0500207 else:
208 self.domain = _domains[0]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500209 logger_cli.info("-> {} nodes inactive".format(len(self.skip_list)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600210 logger_cli.info("-> {} nodes collected".format(len(self.nodes)))
211
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600212 # form an all nodes compound string to use in salt
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600213 self.active_nodes_compound = self.salt.compound_string_from_list(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600214 filter(
Alexe9908f72020-05-19 16:04:53 -0500215 lambda nd: self.nodes[nd]['status'] == NODE_UP,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600216 self.nodes
217 )
218 )
Alex41485522019-04-12 17:26:18 -0500219 # get master node fqdn
Alex3bc95f62020-03-05 17:00:04 -0600220 # _filtered = filter(
221 # lambda nd: self.nodes[nd]['role'] == const.all_roles_map['cfg'],
222 # self.nodes
223 # )
Alex9a4ad212020-10-01 18:04:25 -0500224 _role = all_salt_roles_map['cfg']
Alex3bc95f62020-03-05 17:00:04 -0600225 _filtered = [n for n, v in self.nodes.items() if v['role'] == _role]
Alexe0c5b9e2019-04-23 18:51:23 -0500226 if len(_filtered) < 1:
227 raise SaltException(
228 "No master node detected! Check/Update node role map."
229 )
230 else:
231 self.salt.master_node = _filtered[0]
Alex3ebc5632019-04-18 16:47:18 -0500232
Alex41485522019-04-12 17:26:18 -0500233 # OpenStack versions
234 self.mcp_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500235 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500236 "_param:apt_mk_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500237 )[self.salt.master_node]
Alex41485522019-04-12 17:26:18 -0500238 self.openstack_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500239 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500240 "_param:openstack_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500241 )[self.salt.master_node]
Alexd0391d42019-05-21 18:48:55 -0500242 # Preload codenames
243 # do additional queries to get linux codename and arch for each node
244 self.get_specific_pillar_for_nodes("_param:linux_system_codename")
245 self.get_specific_pillar_for_nodes("_param:linux_system_architecture")
246 for _name in self.nodes.keys():
Alexe9547d82019-06-03 15:22:50 -0500247 _n = self.nodes[_name]
248 if _name not in self.skip_list:
249 _p = _n['pillars']['_param']
250 _n['linux_codename'] = _p['linux_system_codename']
251 _n['linux_arch'] = _p['linux_system_architecture']
Alex41485522019-04-12 17:26:18 -0500252
Alex1839bbf2019-08-22 17:17:21 -0500253 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500254 """Function runs. cmd.run and parses result into place
255 or into dict structure provided
256
257 :return: no return value, data pulished internally
258 """
259 logger_cli.debug(
260 "... collecting results for '{}'".format(cmd)
261 )
262 if target_dict:
263 _nodes = target_dict
264 else:
265 _nodes = self.nodes
Alex1839bbf2019-08-22 17:17:21 -0500266 _result = self.execute_cmd_on_active_nodes(cmd, nodes=nodes)
Alex3bc95f62020-03-05 17:00:04 -0600267 for node, data in _nodes.items():
Alexf3dbe862019-10-07 15:17:04 -0500268
Alex836fac82019-08-22 13:36:16 -0500269 if node in self.skip_list:
270 logger_cli.debug(
271 "... '{}' skipped while collecting '{}'".format(
272 node,
273 cmd
274 )
275 )
276 continue
277 # Prepare target key
278 if target_key not in data:
279 data[target_key] = None
280 # Save data
Alexe9908f72020-05-19 16:04:53 -0500281 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex836fac82019-08-22 13:36:16 -0500282 data[target_key] = None
Alex1839bbf2019-08-22 17:17:21 -0500283 elif node not in _result:
284 continue
Alex836fac82019-08-22 13:36:16 -0500285 elif not _result[node]:
286 logger_cli.debug(
287 "... '{}' not responded after '{}'".format(
288 node,
Alex9a4ad212020-10-01 18:04:25 -0500289 self.env_config.salt_timeout
Alex836fac82019-08-22 13:36:16 -0500290 )
291 )
292 data[target_key] = None
293 else:
294 data[target_key] = _result[node]
295
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600296 def get_specific_pillar_for_nodes(self, pillar_path):
297 """Function gets pillars on given path for all nodes
298
299 :return: no return value, data pulished internally
300 """
Alex3ebc5632019-04-18 16:47:18 -0500301 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500302 "... collecting node pillars for '{}'".format(pillar_path)
Alex3ebc5632019-04-18 16:47:18 -0500303 )
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600304 _result = self.salt.pillar_get(self.active_nodes_compound, pillar_path)
Alex Savatieievefa79c42019-03-14 19:14:04 -0500305 self.not_responded = []
Alex3bc95f62020-03-05 17:00:04 -0600306 for node, data in self.nodes.items():
Alex Savatieievefa79c42019-03-14 19:14:04 -0500307 if node in self.skip_list:
308 logger_cli.debug(
309 "... '{}' skipped while collecting '{}'".format(
310 node,
311 pillar_path
312 )
313 )
314 continue
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600315 _pillar_keys = pillar_path.split(':')
316 _data = data['pillars']
317 # pre-create nested dict
318 for idx in range(0, len(_pillar_keys)-1):
319 _key = _pillar_keys[idx]
320 if _key not in _data:
321 _data[_key] = {}
322 _data = _data[_key]
Alexe9908f72020-05-19 16:04:53 -0500323 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex Savatieievefa79c42019-03-14 19:14:04 -0500324 _data[_pillar_keys[-1]] = None
325 elif not _result[node]:
326 logger_cli.debug(
327 "... '{}' not responded after '{}'".format(
328 node,
Alex9a4ad212020-10-01 18:04:25 -0500329 self.env_config.salt_timeout
Alex Savatieievefa79c42019-03-14 19:14:04 -0500330 )
331 )
332 _data[_pillar_keys[-1]] = None
333 self.not_responded.append(node)
334 else:
335 _data[_pillar_keys[-1]] = _result[node]
Alex3ebc5632019-04-18 16:47:18 -0500336
Alexe0c5b9e2019-04-23 18:51:23 -0500337 def prepare_json_on_node(self, node, _dict, filename):
Alex359e5752021-08-16 17:28:30 -0500338 if node in self.skip_list:
339 logger_cli.debug(
340 "... '{}' skipped while preparing json file of '{}'".format(
341 node,
342 filename
343 )
344 )
345
Alexe0c5b9e2019-04-23 18:51:23 -0500346 # this function assumes that all folders are created
347 _dumps = json.dumps(_dict, indent=2).splitlines()
348 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500349 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alexe0c5b9e2019-04-23 18:51:23 -0500350 )
351 logger_cli.debug(
352 "... uploading data as '{}' "
353 "to master's file cache folder: '{}'".format(
354 filename,
355 _storage_path
356 )
357 )
358 _cache_path = os.path.join(_storage_path, filename)
359 _source_path = os.path.join(
360 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500361 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500362 filename
363 )
364 _target_path = os.path.join(
365 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500366 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500367 filename
368 )
369
370 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
371 self.salt.f_touch_master(_cache_path)
372 self.salt.f_append_master(_cache_path, _dumps)
373 logger.debug("... syncing file to '{}'".format(node))
374 self.salt.get_file(
375 node,
376 _source_path,
377 _target_path,
378 tgt_type="compound"
379 )
380 return _target_path
381
382 def prepare_script_on_active_nodes(self, script_filename):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600383 # Prepare script
384 _p = os.path.join(pkg_dir, 'scripts', script_filename)
385 with open(_p, 'rt') as fd:
386 _script = fd.read().splitlines()
387 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500388 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600389 )
390 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500391 "... uploading script {} "
Alex3ebc5632019-04-18 16:47:18 -0500392 "to master's file cache folder: '{}'".format(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600393 script_filename,
394 _storage_path
395 )
396 )
Alexe0c5b9e2019-04-23 18:51:23 -0500397 self.salt.mkdir(self.salt.master_node, _storage_path)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600398 # Form cache, source and target path
399 _cache_path = os.path.join(_storage_path, script_filename)
400 _source_path = os.path.join(
401 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500402 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600403 script_filename
404 )
405 _target_path = os.path.join(
406 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500407 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600408 script_filename
409 )
410
Alexb151fbe2019-04-22 16:53:30 -0500411 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
Alex3ebc5632019-04-18 16:47:18 -0500412 self.salt.f_touch_master(_cache_path)
413 self.salt.f_append_master(_cache_path, _script)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600414 # command salt to copy file to minions
Alex3ebc5632019-04-18 16:47:18 -0500415 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500416 "... creating script target folder '{}'".format(
Alex3ebc5632019-04-18 16:47:18 -0500417 _cache_path
418 )
419 )
420 self.salt.mkdir(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600421 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600422 os.path.join(
423 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500424 self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600425 ),
426 tgt_type="compound"
427 )
Alex3ebc5632019-04-18 16:47:18 -0500428 logger.debug("... syncing file to nodes")
429 self.salt.get_file(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600430 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600431 _source_path,
432 _target_path,
433 tgt_type="compound"
434 )
Alexe0c5b9e2019-04-23 18:51:23 -0500435 # return path on nodes, just in case
436 return _target_path
437
438 def execute_script_on_node(self, node, script_filename, args=[]):
439 # Prepare path
440 _target_path = os.path.join(
441 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500442 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500443 script_filename
444 )
445
446 # execute script
447 logger.debug("... running script on '{}'".format(node))
448 # handle results for each node
449 _script_arguments = " ".join(args) if args else ""
450 self.not_responded = []
451 _r = self.salt.cmd(
452 node,
453 'cmd.run',
454 param='python {} {}'.format(_target_path, _script_arguments),
455 expr_form="compound"
456 )
457
458 # all false returns means that there is no response
459 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
460 return _r
461
Alex1f90e7b2021-09-03 15:31:28 -0500462 def execute_script_on_active_nodes(self, script_filename, args=None):
Alexe0c5b9e2019-04-23 18:51:23 -0500463 # Prepare path
464 _target_path = os.path.join(
465 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500466 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500467 script_filename
468 )
469
470 # execute script
Alexd0391d42019-05-21 18:48:55 -0500471 logger_cli.debug("... running script")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600472 # handle results for each node
Alex1f90e7b2021-09-03 15:31:28 -0500473 _script_arguments = args if args else ""
Alex Savatieievefa79c42019-03-14 19:14:04 -0500474 self.not_responded = []
475 _r = self.salt.cmd(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600476 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600477 'cmd.run',
478 param='python {} {}'.format(_target_path, _script_arguments),
479 expr_form="compound"
480 )
481
Alex Savatieievefa79c42019-03-14 19:14:04 -0500482 # all false returns means that there is no response
Alex3ebc5632019-04-18 16:47:18 -0500483 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500484 return _r
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600485
Alex1839bbf2019-08-22 17:17:21 -0500486 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500487 # execute cmd
488 self.not_responded = []
489 _r = self.salt.cmd(
Alex1839bbf2019-08-22 17:17:21 -0500490 nodes if nodes else self.active_nodes_compound,
Alex836fac82019-08-22 13:36:16 -0500491 'cmd.run',
492 param=cmd,
493 expr_form="compound"
494 )
495
496 # all false returns means that there is no response
497 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
498 return _r
499
Alex9a4ad212020-10-01 18:04:25 -0500500
501class KubeNodes(Nodes):
502 def __init__(self, config):
503 super(KubeNodes, self).__init__(config)
504 logger_cli.info("# Gathering environment information")
505 # simple salt rest client
506 self.kube = get_kube_remote(self.env_config)
507 self.env_type = ENV_TYPE_KUBE
Alex1f90e7b2021-09-03 15:31:28 -0500508 self._namespace = "qa-space"
509 self._configmap_name = self.env_config.kube_scripts_folder
510
511 # prepare needed resources
Alexb78191f2021-11-02 16:35:46 -0500512 self.prepared_daemonsets = []
Alex0bcf31b2022-03-29 17:38:58 -0500513 # Check if we need resources prepared
514 if not config.prepare_qa_resources:
515 logger_cli.debug("... skipped preparing resources")
516 self._scripts = None
517 return
518 else:
519 self._check_namespace()
520 self._scripts = self._check_config_map()
Alex1f90e7b2021-09-03 15:31:28 -0500521
522 def _check_namespace(self):
523 # ensure namespace
524 logger_cli.debug(
525 "... checking namespace '{}'".format(self._namespace)
526 )
527 if not self.kube.ensure_namespace(self._namespace):
528 raise KubeException(
529 "Failed to manage namespace '{}'".format(self._namespace)
530 )
531
532 def _check_config_map(self):
533 # ensure config map exists
534 logger_cli.debug(
535 "... checking config map '{}'".format(self._configmap_name)
536 )
537 _source = os.path.join(pkg_dir, 'scripts')
538 return self.kube.create_config_map(
539 self._namespace,
540 self._configmap_name,
541 _source
542 )
Alex9a4ad212020-10-01 18:04:25 -0500543
544 def gather_node_info(self, skip_list, skip_list_file):
545 # Gather nodes info and query pod lists for each node
546 logger_cli.debug("... collecting node names existing in the cloud")
547
548 # Gather node names and info
549 _nodes = self.kube.get_node_info()
550 _node_names = list(_nodes.keys())
551 # Skip nodes if needed
552 _skipped_nodes = \
553 _prepare_skipped_nodes(_node_names, skip_list, skip_list_file)
554
555 # Count how many nodes active
556 self._active = [n for n, v in _nodes.items()
557 if v['conditions']['ready']['status']]
558
559 # iterate through all accepted nodes and create a dict for it
560 self.nodes = {}
561 self.skip_list = []
Alex9a4ad212020-10-01 18:04:25 -0500562 for _name in _node_names:
563 if _name in _skipped_nodes:
564 _status = NODE_SKIP
565 self.skip_list.append(_name)
566 else:
567 _status = NODE_UP if _name in self._active else NODE_DOWN
568 if _status == NODE_DOWN:
569 self.skip_list.append(_name)
570 logger_cli.info(
571 "-> '{}' shows 'Ready' as 'False', "
572 "added to skip list".format(
573 _name
574 )
575 )
576 _roles = {}
577 _labels = {}
578 for _label, _value in _nodes[_name]['labels'].items():
579 if _label in all_kube_roles_map:
580 _roles[all_kube_roles_map[_label]] = _value
581 else:
582 _labels[_label] = _value
583
584 self.nodes[_name] = deepcopy(node_tmpl)
585 self.nodes[_name].pop("grains")
586 self.nodes[_name].pop("pillars")
587
588 # hostname
589 self.nodes[_name]['shortname'] = \
590 _nodes[_name]['addresses']['hostname']['address']
Alexe4de1142022-11-04 19:26:03 -0500591 # internal
Alex9a4ad212020-10-01 18:04:25 -0500592 self.nodes[_name]['internalip'] = \
593 _nodes[_name]['addresses']['internalip']['address']
Alexe4de1142022-11-04 19:26:03 -0500594 # alternate
595 if self.env_config.force_node_network is not None:
596 iIP = self.nodes[_name]['internalip']
597 # use last number
598 aIP = self.env_config.force_node_network + iIP.split('.')[-1]
599 self.nodes[_name]["altip"] = aIP
Alex9a4ad212020-10-01 18:04:25 -0500600 self.nodes[_name]['node_group'] = None
601 self.nodes[_name]['labels'] = _labels
602 self.nodes[_name]['roles'] = _roles
603 self.nodes[_name]['status'] = _status
604 # Backward compatibility
605 _info = _nodes[_name]['status']['node_info']
606 self.nodes[_name]['linux_image'] = _info['os_image']
607 self.nodes[_name]['linux_arch'] = _info['architecture']
608
609 _codename = "unknown"
610 _n, _v, _c = _info['os_image'].split()
611 if _n.lower() == 'ubuntu':
612 _v, _, _ = _v.rpartition('.') if '.' in _v else (_v, "", "")
613 if _v in ubuntu_versions:
614 _codename = ubuntu_versions[_v].split()[0].lower()
615 self.nodes[_name]['linux_codename'] = _codename
616
617 # Consider per-data type transfer
618 self.nodes[_name]["raw"] = _nodes[_name]
619 # TODO: Investigate how to handle domains in Kube, probably - skip
620 # _domains = list(_domains)
621 # if len(_domains) > 1:
622 # logger_cli.warning(
623 # "Multiple domains detected: {}".format(",".join(_domains))
624 # )
625 # else:
Alex1f90e7b2021-09-03 15:31:28 -0500626 self.domain = "no.domain.in.kube.yet"
Alex9a4ad212020-10-01 18:04:25 -0500627 logger_cli.info(
628 "-> {} nodes collected: {} - active, {} - not active".format(
629 len(self.nodes),
630 len(self._active),
631 len(self.skip_list)
632 )
633 )
634
635 _role = "k8s-master"
636 _filtered = [n for n, v in self.nodes.items() if _role in v['roles']]
637 if len(_filtered) < 1:
638 raise KubeException(
639 "No k8s-master nodes detected! Check/Update node role map."
640 )
Alex Savatieievefa79c42019-03-14 19:14:04 -0500641 else:
Alex9a4ad212020-10-01 18:04:25 -0500642 _r = [n for n, v in self.nodes.items()
643 if v['status'] != NODE_UP and _role in v['roles']]
644 if len(_r) > 0:
645 logger_cli.warn(
646 "Master nodes are reporting 'NotReady:\n{}".format(
647 "\n".join(_r)
648 )
649 )
650 self.kube.master_node = _filtered[0]
Alexe0c5b9e2019-04-23 18:51:23 -0500651
Alex9a4ad212020-10-01 18:04:25 -0500652 # get specific data upfront
653 # OpenStack versions
654 self.mcp_release = ""
655 # Quick and Dirty way to detect OS release
Alexccb72e02021-01-20 16:38:03 -0600656 try:
657 _nova_version = self.kube.exec_on_target_pod(
658 "nova-manage --version",
659 "nova-api-osapi",
660 "openstack"
661 )
662 _nmajor = _nova_version.partition('.')[0]
663 self.openstack_release = nova_openstack_versions[_nmajor]
664 except KubeException as e:
665 logger_cli.warn("Openstack not detected: {}".format(e.message))
666 self.openstack_release = nova_openstack_versions["00"]
Alexe0c5b9e2019-04-23 18:51:23 -0500667
Alex9a4ad212020-10-01 18:04:25 -0500668 return
669
670 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500671 def _get_ssh_shell(_h, _u, _k, _p, _q, _pipe, timeout=15):
Alex9a4ad212020-10-01 18:04:25 -0500672 _ssh = SshShell(
673 _h,
674 user=_u,
675 keypath=_k,
676 port=_p,
677 silent=_q,
Alex1f90e7b2021-09-03 15:31:28 -0500678 piped=_pipe,
679 timeout=timeout
Alex9a4ad212020-10-01 18:04:25 -0500680 )
681 return _ssh.connect()
682
683 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500684 def _do_ssh_cmd(_cmd, _h, _u, _k, _p, _q, _pipe, timeout=None):
Alex9a4ad212020-10-01 18:04:25 -0500685 with SshShell(
686 _h,
687 user=_u,
688 keypath=_k,
689 port=_p,
690 silent=_q,
691 piped=_pipe
692 ) as ssh:
Alex1f90e7b2021-09-03 15:31:28 -0500693 if timeout is None:
694 _r = ssh.do(_cmd)
695 else:
696 _r = ssh.do(_cmd, timeout=timeout)
Alex9a4ad212020-10-01 18:04:25 -0500697 logger_cli.debug("'{}'".format(_r))
698 return _r
699
700 def node_shell(
701 self,
702 node,
703 silent=True,
704 piped=True,
705 use_sudo=True,
706 fport=None
707 ):
708 _u = self.env_config.kube_node_user
709 _k = self.env_config.kube_node_keypath
Alexe4de1142022-11-04 19:26:03 -0500710
711 _n = self.nodes[node]
712 _h = _n['altip'] if "altip" in _n else _n['internalip']
Alex9a4ad212020-10-01 18:04:25 -0500713 _p = 22
Alexeffa0682021-06-04 12:18:33 -0500714 if self.kube.is_local or self.kube.config.ssh_direct:
Alexf6ec91b2021-09-10 10:11:17 -0500715 logger.debug("Getting shell with no port forward")
716 return [None, self._get_ssh_shell(
Alex1f90e7b2021-09-03 15:31:28 -0500717 _h, _u, _k, _p, silent, piped,
718 timeout=self.kube.config.ssh_connect_timeout
Alexf6ec91b2021-09-10 10:11:17 -0500719 )]
Alex9a4ad212020-10-01 18:04:25 -0500720 else:
Alexf6ec91b2021-09-10 10:11:17 -0500721 logger.debug("Getting shell with with forward")
Alex9a4ad212020-10-01 18:04:25 -0500722 _fh = "localhost"
723 _p = 10022 if not fport else fport
724 _pfwd = PortForward(
725 self.env_config.ssh_host,
726 _h,
727 user=_u,
728 keypath=self.env_config.ssh_key,
Alex1f90e7b2021-09-03 15:31:28 -0500729 loc_port=_p,
730 timeout=self.kube.config.ssh_connect_timeout
Alex9a4ad212020-10-01 18:04:25 -0500731 )
732 _pfwd.connect()
Alex1f90e7b2021-09-03 15:31:28 -0500733 _ssh = self._get_ssh_shell(
734 _fh,
735 _u,
736 _k,
737 _p,
738 silent,
739 piped,
740 timeout=self.kube.config.ssh_connect_timeout
741 )
Alexf6ec91b2021-09-10 10:11:17 -0500742 return [_pfwd, _ssh]
Alex9a4ad212020-10-01 18:04:25 -0500743
744 def execute_script_on_node(self, node, script_filename, args=[]):
745 # Prepare path
746 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600747 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500748 self.env_config.kube_scripts_folder,
749 script_filename
750 )
751
752 # execute script
753 logger_cli.debug("... running script on '{}'".format(node))
754 # handle results for each node
755 _script_arguments = " ".join(args) if args else ""
756 self.not_responded = []
757 # get result
758 _nr = self.node_shell(
759 node,
760 "python {} {}".format(
761 _target_path,
762 _script_arguments
763 )
764 )
765
766 if not _nr:
767 self.not_responded.append(node)
768 return {}
769 else:
770 return {node: _nr}
771
772 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
773 # execute script
774 logger_cli.debug("...running '{}' on active nodes".format(cmd))
775 # handle results for each node
776 self.not_responded = []
777 _r = {}
778 # TODO: Use threading and pool
779 for node in self._active:
780 _nr = self.node_shell(
781 node,
782 cmd
783 )
784
785 if not _nr:
786 self.not_responded.append(node)
787 else:
788 _r[node] = _nr
789
790 return _r
791
Alex1f90e7b2021-09-03 15:31:28 -0500792 def _ssh_exec_script(self, params):
Alex9a4ad212020-10-01 18:04:25 -0500793 """
794 Threadsafe method to get shell to node,
795 check/copy script and get results
796 [
797 node_name,
798 src_path,
799 tgt_path,
800 conf,
801 args
802 ]
803 """
Alex1f90e7b2021-09-03 15:31:28 -0500804 _timeout = self.kube.config.script_execution_timeout
Alex9a4ad212020-10-01 18:04:25 -0500805 _name = params[0]
806 _src = params[1]
807 _tgt = params[2]
808 _conf = params[3]
809 _args = params[4]
810 _port = params[5]
811 _log_name = "["+_name+"]:"
812 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
813 "else echo False; fi)"
814 _fwd_sh, _sh = self.node_shell(
815 _name,
816 use_sudo=False,
817 fport=_port
818 )
819 # check python3
820 _python = _sh.do("which python3")
821 _python = utils.to_bool(
822 _sh.do(_check.format(_python))
823 )
824 if not _python:
Alex1f90e7b2021-09-03 15:31:28 -0500825 _sh.do("apt install python3", sudo=True, timeout=_timeout)
Alex9a4ad212020-10-01 18:04:25 -0500826 # check if script already there
827 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600828 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500829 _conf.kube_scripts_folder
830 )
831 # check if folder exists
832 _folder_exists = utils.to_bool(
833 _sh.do(_check.format(_folder))
834 )
835 if not _folder_exists:
836 _sh.do("mkdir " + _folder)
837 logger.info("{} Syncing file".format(_log_name))
838 _code, _r, _e = _sh.scp(
839 _src,
840 _sh.get_host_path(_tgt),
841 )
842 # handle error code
843 if _code:
844 logger_cli.warn(
845 "{} Error in scp:\n"
846 "\tstdout:'{}'\n"
847 "\tstderr:'{}'".format(_log_name, _r, _e)
848 )
849
850 # execute script
851 logger.debug("{} Running script".format(_log_name))
852 _out = _sh.do(
853 "python3 {}{}".format(
854 _tgt,
855 _args
856 ),
Alex1f90e7b2021-09-03 15:31:28 -0500857 sudo=True,
858 timeout=_timeout
Alex9a4ad212020-10-01 18:04:25 -0500859 )
860
861 if _fwd_sh:
862 _fwd_sh.kill()
863 _sh.kill()
864
865 return [_name, _out]
866
Alex1f90e7b2021-09-03 15:31:28 -0500867 def execute_script_on_active_nodes(self, script_filename, args=None):
Alex9a4ad212020-10-01 18:04:25 -0500868 # Prepare script
869 _source_path = os.path.join(pkg_dir, 'scripts', script_filename)
870 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600871 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500872 self.env_config.kube_scripts_folder,
873 script_filename
874 )
875 # handle results for each node
876 _script_arguments = " ".join(args) if args else ""
877 if _script_arguments:
878 _script_arguments = " " + _script_arguments
879 self.not_responded = []
880 _results = {}
881 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500882 "... running '{}' on active nodes, {} worker threads".format(
Alex9a4ad212020-10-01 18:04:25 -0500883 script_filename,
884 self.env_config.threads
885 )
886 )
887 # Workers pool
888 pool = Pool(self.env_config.threads)
889
890 # init the parameters
891 # node_name,
892 # src_path,
893 # tgt_path,
894 # conf,
895 # args
896 _params = []
897 _port = 10022
898 for node in self._active:
899 # build parameter blocks
900 _p_list = [
901 node,
902 _source_path,
903 _target_path,
904 self.env_config,
905 _script_arguments,
906 _port
907 ]
908 _params.append(_p_list)
909 _port += 1
910
911 _progress = Progress(len(_params))
Alex1f90e7b2021-09-03 15:31:28 -0500912 results = pool.imap_unordered(self._ssh_exec_script, _params)
Alex9a4ad212020-10-01 18:04:25 -0500913
914 for ii in enumerate(results, start=1):
915 if not ii[1][1]:
916 self.not_responded.append(ii[1][0])
917 else:
918 _results[ii[1][0]] = ii[1][1]
919 _progress.write_progress(ii[0])
920
921 _progress.end()
922 pool.close()
923 pool.join()
924
925 # return path on nodes, just in case
926 return _results
927
928 def prepare_json_on_node(self, node, _dict, filename):
929 # this function assumes that all folders are created
930 _dumps = json.dumps(_dict, indent=2).splitlines()
931
932 _source_path = create_temp_file_with_content(_dumps)
933 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600934 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500935 self.env_config.kube_scripts_folder,
936 filename
937 )
938 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600939 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500940 self.env_config.kube_scripts_folder
941 )
942 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
943 "else echo False; fi)"
944 _fwd_sh, _sh = self.node_shell(
945 node,
946 use_sudo=False
947 )
948
949 # check if folder exists
950 _folder_exists = utils.to_bool(
951 _sh.do(_check.format(_folder))
952 )
953 if not _folder_exists:
954 _sh.do("mkdir " + _folder)
955 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500956 "... create data on node '{}':'{}'".format(node, _target_path)
Alex9a4ad212020-10-01 18:04:25 -0500957 )
958 _code, _r, _e = _sh.scp(
959 _source_path,
960 _sh.get_host_path(_target_path),
961 )
962 # handle error code
963 if _code:
964 logger_cli.warn(
965 "Error in scp:\n"
966 "\tstdout:'{}'\n"
967 "\tstderr:'{}'".format(_r, _e)
968 )
969
970 _fwd_sh.kill()
971 _sh.kill()
972 return _target_path
Alex1f90e7b2021-09-03 15:31:28 -0500973
Alex7b0ee9a2021-09-21 17:16:17 -0500974 def prepare_daemonset(self, template_filename):
Alex1f90e7b2021-09-03 15:31:28 -0500975 # load template
976 _yaml_file = os.path.join(pkg_dir, 'templates', template_filename)
977 logger_cli.debug("... loading template '{}'".format(_yaml_file))
978 _ds = {}
979 with open(_yaml_file) as dsFile:
980 _ds = yaml.load(dsFile, Loader=yaml.SafeLoader)
981
982 # Add scripts to pod template as volumeMounts
983 _tspec = _ds['spec']['template']['spec']
984 _tspec['containers'][0]['volumeMounts'] = [
985 {
986 "name": "scripts",
987 "mountPath": os.path.join(
988 "/",
989 self.env_config.kube_scripts_folder
990 )
991 }
992 ]
993
994 _tspec['volumes'] = [
995 {
996 "name": "scripts",
997 "configMap": {
998 "name": self._configmap_name
999 }
1000 }
1001 ]
1002
1003 # create daemonset
1004 logger_cli.debug("... preparing daemonset")
Alexb78191f2021-11-02 16:35:46 -05001005 _ds = self.kube.prepare_daemonset_from_yaml(self._namespace, _ds)
1006 # Save prepared daemonset
1007 self.prepared_daemonsets.append(_ds)
1008 # return it
1009 return _ds
Alex1f90e7b2021-09-03 15:31:28 -05001010
1011 def wait_for_daemonset(self, ds, timeout=120):
1012 # iteration timeout
1013 _sleep_time = 5
1014 _timeout = timeout
1015
1016 # query daemonset and check that desired=scheduled=ready
1017 _ds = self.kube.get_daemon_set_by_name(
1018 ds.metadata.namespace,
1019 ds.metadata.name
1020 )
1021
Alex0bcf31b2022-03-29 17:38:58 -05001022 _total = len(self.nodes) - len(self.skip_list)
Alex1f90e7b2021-09-03 15:31:28 -05001023 # _scheduled = _ds.status.scheduled
1024 # _ready = _ds.status.ready
1025
1026 # Init Progress bar to show daemonset readiness
1027 _progress = Progress(_total)
1028 while _timeout > 0:
1029 # get new status
1030 _ds = self.kube.get_daemon_set_by_name(
1031 ds.metadata.namespace,
1032 ds.metadata.name
1033 )
1034 _desired = _ds.status.desired_number_scheduled
1035 _scheduled = _ds.status.current_number_scheduled
1036 _ready = _ds.status.number_ready
1037 _updated = _ds.status.updated_number_scheduled
1038 # print it
1039 _progress.write_progress(
1040 _ready,
1041 note="desired: {}, scheduled: {}, ready: {},"
1042 " up-to-date: {}".format(
1043 _desired,
1044 _scheduled,
1045 _ready,
1046 _updated
1047 )
1048 )
1049
1050 # check values and return
1051 # In case of Update, also checking _updated value
Alex163aa042022-12-01 11:58:32 -06001052 if _ready == _updated and _ready == _total - len(self.skip_list):
Alex1f90e7b2021-09-03 15:31:28 -05001053 # close progress bar class
1054 _progress.end()
1055 logger_cli.debug("... daemonset is ready")
1056 return True
1057 # iterate
1058 _timeout -= _sleep_time
1059 # wait
1060 sleep(_sleep_time)
1061
1062 # timed out
1063 _progress.end()
1064 # log it
1065 logger_cli.error("Timed out waiting for Daemonset to be ready")
1066 return False
1067
Alexdcb792f2021-10-04 14:24:21 -05001068 def exec_script_on_target_pod(self, pod_name, script_filename, args=None):
Alex7b0ee9a2021-09-21 17:16:17 -05001069 """
1070 Run script from configmap on target pod assuming it is present
1071 """
1072 _arguments = args if args else ""
1073 _cmd = [
1074 "python3",
1075 os.path.join(
1076 "/",
1077 self.env_config.kube_scripts_folder,
1078 script_filename
1079 )
1080 ] + _arguments
1081 _result = self.kube.exec_on_target_pod(
1082 _cmd,
1083 pod_name,
1084 self._namespace,
1085 strict=True
1086 )
1087 return _result
1088
Alexdcb792f2021-10-04 14:24:21 -05001089 def exec_cmd_on_target_pod(self, pod_name, ns, command_str):
1090 """
Alex0bcf31b2022-03-29 17:38:58 -05001091 Run cmd on target pod
1092
Alexdcb792f2021-10-04 14:24:21 -05001093 """
1094 _result = self.kube.exec_on_target_pod(
1095 command_str,
1096 pod_name,
1097 ns,
1098 strict=True
1099 )
1100 return _result
1101
Alexb78191f2021-11-02 16:35:46 -05001102 def execute_cmd_on_daemon_set(
1103 self,
1104 ds,
1105 cmd,
Alexb2129542021-11-23 15:49:42 -06001106 _args=None,
Alexb78191f2021-11-02 16:35:46 -05001107 is_script=False
1108 ):
Alex1f90e7b2021-09-03 15:31:28 -05001109 """
1110 Query daemonset for pods and execute script on all of them
1111 """
Alexb2129542021-11-23 15:49:42 -06001112 _results = self.exec_cmd_on_pods(
1113 self.kube.get_pods_for_daemonset(ds),
1114 cmd,
1115 _args=_args,
1116 is_script=is_script
1117 )
1118 # Update results
1119 _ds_results = {}
Alex0bcf31b2022-03-29 17:38:58 -05001120 # only node name and result is needed
1121 # pod name and cmd ignored
1122 for _n, _, _v, _ in _results:
Alexb2129542021-11-23 15:49:42 -06001123 _ds_results[_n] = _v
1124 return _ds_results
1125
Alexe4de1142022-11-04 19:26:03 -05001126 def exec_on_labeled_pods_and_ns(
1127 self,
1128 label_str,
1129 cmd,
1130 _args=None,
1131 ns=None,
1132 silent=False
1133 ):
Alexb2129542021-11-23 15:49:42 -06001134 if not ns:
1135 ns = self._namespace
1136 _results = self.exec_cmd_on_pods(
1137 self.kube.list_pods(ns, label_str=label_str),
1138 cmd,
Alexe4de1142022-11-04 19:26:03 -05001139 _args=_args,
1140 silent=silent
Alexb2129542021-11-23 15:49:42 -06001141 )
1142 _pod_results = {}
1143 for _, _p, _v in _results:
1144 _pod_results[_p] = _v
1145 return _pod_results
1146
Alex0bcf31b2022-03-29 17:38:58 -05001147 def _pooled_exec_on_pod(self, plist, silent=False):
Alex1f90e7b2021-09-03 15:31:28 -05001148 def _kube_exec_on_pod(plist):
1149 return [
1150 plist[1], # node
1151 plist[3], # pod name
1152 plist[0].kube.exec_on_target_pod( # pointer to function
1153 plist[4], # cmd
1154 plist[3], # pod name
1155 plist[2], # namespace
1156 strict=True,
1157 _request_timeout=120,
Alexb78191f2021-11-02 16:35:46 -05001158 arguments=plist[5]
Alex0bcf31b2022-03-29 17:38:58 -05001159 ),
1160 # save cmd used
1161 plist[4]
Alex1f90e7b2021-09-03 15:31:28 -05001162 ]
Alex0bcf31b2022-03-29 17:38:58 -05001163 # map func and cmd
1164 pool = Pool(self.env_config.threads)
1165 _results = []
1166 self.not_responded = []
1167 # create result list
1168 if not silent:
1169 _progress = Progress(len(plist))
1170 ret = pool.imap_unordered(_kube_exec_on_pod, plist)
1171
1172 for ii in enumerate(ret, start=1):
1173 if not ii[1][1]:
1174 self.not_responded.append(ii[1][0])
1175 else:
1176 _results.append(ii[1])
1177 if not silent:
1178 _progress.write_progress(ii[0])
1179
1180 if not silent:
1181 _progress.end()
1182 pool.close()
1183 pool.join()
1184 logger_cli.debug(
1185 "... done, {} total outputs; {} not responded".format(
1186 len(_results),
1187 len(self.not_responded)
1188 )
1189 )
1190 return _results
1191
1192 def exec_cmd_on_pods(
1193 self,
1194 pod_list,
1195 cmd,
1196 _args=None,
1197 is_script=False,
1198 silent=False
1199 ):
Alex1f90e7b2021-09-03 15:31:28 -05001200
Alex1f90e7b2021-09-03 15:31:28 -05001201 # Create map for threads: [[node_name, ns, pod_name, cmd]...]
1202 logger_cli.debug(
1203 "... runnning script on {} pods using {} threads at a time".format(
Alexb2129542021-11-23 15:49:42 -06001204 len(pod_list.items),
Alex1f90e7b2021-09-03 15:31:28 -05001205 self.env_config.threads
1206 )
1207 )
1208 _plist = []
Alexb2129542021-11-23 15:49:42 -06001209 _arguments = _args if _args else ""
Alexb78191f2021-11-02 16:35:46 -05001210 if is_script:
1211 _cmd = [
1212 "python3",
1213 os.path.join(
1214 "/",
1215 self.env_config.kube_scripts_folder,
1216 cmd
1217 ),
1218 _arguments
1219 ]
1220 _cmd = " ".join(_cmd)
1221 else:
1222 # decide if we are to wrap it to bash
1223 if '|' in cmd:
1224 _cmd = "bash -c"
1225 _arguments = cmd
1226 else:
1227 _cmd = cmd
Alexb2129542021-11-23 15:49:42 -06001228 for item in pod_list.items:
Alex1f90e7b2021-09-03 15:31:28 -05001229 _plist.append(
1230 [
1231 self,
1232 item.spec.node_name,
1233 item.metadata.namespace,
1234 item.metadata.name,
Alexb78191f2021-11-02 16:35:46 -05001235 _cmd,
1236 _arguments
Alex1f90e7b2021-09-03 15:31:28 -05001237 ]
1238 )
1239
Alex0bcf31b2022-03-29 17:38:58 -05001240 return self._pooled_exec_on_pod(_plist, silent=silent)
Alex1f90e7b2021-09-03 15:31:28 -05001241
Alex0bcf31b2022-03-29 17:38:58 -05001242 def exec_cmds_on_pod(self, pod, cmd_list):
Alex1f90e7b2021-09-03 15:31:28 -05001243 logger_cli.debug(
Alex0bcf31b2022-03-29 17:38:58 -05001244 "... runnning {} cmds using {} threads at a time".format(
1245 len(cmd_list),
1246 self.env_config.threads
Alex1f90e7b2021-09-03 15:31:28 -05001247 )
1248 )
Alex0bcf31b2022-03-29 17:38:58 -05001249 _plist = []
1250 # decide if we are to wrap it to bash
1251 for item in cmd_list:
1252 if '|' in item:
1253 _cmd = "bash -c"
1254 _arguments = item
1255 else:
1256 _cmd = item
1257 _arguments = ""
1258 _plist.append(
1259 [
1260 self,
1261 pod.spec.node_name,
1262 pod.metadata.namespace,
1263 pod.metadata.name,
1264 _cmd,
1265 _arguments
1266 ]
1267 )
1268
1269 return self._pooled_exec_on_pod(_plist)
Alex1f90e7b2021-09-03 15:31:28 -05001270
1271 def delete_daemonset(self, ds):
1272 # Try to delete daemonset
1273 try:
1274 _r = self.kube.delete_daemon_set_by_name(
1275 ds.metadata.namespace,
1276 ds.metadata.name
1277 )
1278 except Exception as e:
1279 logger_cli.warning("Failed to delete daemonset '{}': {}".format(
1280 ds.metadata.name,
1281 e.reason
1282 ))
1283 _r = None
1284 return _r
Alex7b0ee9a2021-09-21 17:16:17 -05001285
1286 def get_pod_name_in_daemonset_by_node(self, nodename, daemonset):
1287 _podname = None
1288 _pods = self.kube.get_pods_for_daemonset(daemonset)
1289 for item in _pods.items:
1290 if item.spec.node_name == nodename:
1291 _podname = item.metadata.name
1292
1293 return _podname
1294
1295 def prepare_json_in_pod(self, podname, namespace, targets, filename):
1296 # Iterate pods in daemonset and prepare json file on each one
1297 _target_path = os.path.join(
1298 "/",
1299 "tmp",
1300 filename
1301 )
1302 # check folder will probably not needed as the daemonset links
1303 # configmap there on creation
1304 # _folder = os.path.join(
1305 # self.env_config.kube_node_homepath,
1306 # self.env_config.kube_scripts_folder
1307 # )
1308 # prepare data
1309 buffer = json.dumps(targets, indent=2).encode('utf-8')
1310
1311 # write data to pod using fancy websocket function
1312 self.kube.put_string_buffer_to_pod_as_textfile(
1313 podname,
1314 namespace,
1315 buffer,
1316 _target_path
1317 )
1318
1319 # TODO: Exception handling
1320
1321 return _target_path
Alexb78191f2021-11-02 16:35:46 -05001322
1323 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
1324 """Function runs command on daemonset and parses result into place
1325 or into dict structure provided
1326
1327 :return: no return value, data pulished internally
1328 """
1329 logger_cli.debug(
1330 "... collecting results for '{}'".format(cmd)
1331 )
1332 if target_dict:
1333 _nodes = target_dict
1334 else:
1335 _nodes = self.nodes
1336 # Dirty way to get daemonset that was used in checker and not deleted
1337 _ds = self.prepared_daemonsets[0]
1338 _result = self.execute_cmd_on_daemon_set(_ds, cmd)
1339 for node, data in _nodes.items():
1340
1341 if node in self.skip_list:
1342 logger_cli.debug(
1343 "... '{}' skipped while collecting '{}'".format(
1344 node,
1345 cmd
1346 )
1347 )
1348 continue
1349 # Prepare target key
1350 if target_key not in data:
1351 data[target_key] = None
1352 # Save data
1353 if data['status'] in [NODE_DOWN, NODE_SKIP]:
1354 data[target_key] = None
1355 elif node not in _result:
1356 continue
1357 elif not _result[node]:
1358 logger_cli.debug(
1359 "... '{}' not responded after '{}'".format(
1360 node,
1361 self.env_config.salt_timeout
1362 )
1363 )
1364 data[target_key] = None
1365 else:
1366 data[target_key] = _result[node]
Alex5cace3b2021-11-10 16:40:37 -06001367
1368 def prepare_benchmark_agent(self, index, path, sc, size, template):
1369 # Load pod template
1370 _yaml_file = os.path.join(pkg_dir, 'templates', template)
1371 logger_cli.debug("... loading template '{}'".format(_yaml_file))
1372 _pod = {}
1373 with open(_yaml_file) as podFile:
1374 _pod = yaml.load(podFile, Loader=yaml.SafeLoader)
1375
1376 # set namings
1377 _n = "cfgagent-{:02}".format(index)
1378 _pvc_n = "cfgagent-pvc-{:02}".format(index)
Alex90ac1532021-12-09 11:13:14 -06001379 # _pv_n = "cfgagent-pv-{:02}".format(index)
Alex5cace3b2021-11-10 16:40:37 -06001380
1381 _pod["metadata"]["name"] = _n
1382 _pod["metadata"]["labels"]["name"] = _n
1383 # replace volumeMounts
1384 for _c in _pod["spec"]["containers"]:
1385 for _mnt in _c["volumeMounts"]:
1386 if "placeholder" in _mnt["name"]:
Alex90ac1532021-12-09 11:13:14 -06001387 # _mnt["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001388 _mnt["mountPath"] = path
1389 # replace claim
1390 for _v in _pod["spec"]["volumes"]:
Alex30380a42021-12-20 16:11:20 -06001391 if "cfgagent-pv" in _v["name"]:
Alex90ac1532021-12-09 11:13:14 -06001392 # _v["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001393 _v["persistentVolumeClaim"]["claimName"] = _pvc_n
1394
1395 # init volume resources
Alex90ac1532021-12-09 11:13:14 -06001396 # _pv_object = self.kube.init_pv_resource(_pv_n, sc, size, path)
1397 # _pv = self.kube.prepare_pv(_pv_object)
Alex30380a42021-12-20 16:11:20 -06001398 # update size of the volume to be 15% larger
Alex5cace3b2021-11-10 16:40:37 -06001399 _pvc_object = self.kube.init_pvc_resource(_pvc_n, sc, size)
1400 _pvc = self.kube.prepare_pvc(_pvc_object)
1401
1402 # start pod
1403 _pod = self.kube.prepare_pod_from_yaml(_pod)
1404
Alex90ac1532021-12-09 11:13:14 -06001405 # return _pod, _pv, _pvc
1406 return _pod, _pvc
Alex5cace3b2021-11-10 16:40:37 -06001407
1408 def expose_benchmark_agent(self, agent):
1409 return self.kube.expose_pod_port(agent, 8765)
Alex2a7657c2021-11-10 20:51:34 -06001410
1411 def cleanup_resource_by_name(self, res_type, name, ns=None, wait=False):
1412 """Cleansup resource using string res_type and the ns/name
1413
1414 Args:
1415 res_type (string): resource type name: pod, pv, pvc, svc
1416 name (string): resource name to cleanup
1417 ns (string, optional): Namespace to use. Default is 'qa-space'
1418
1419 return: (Bool) Is Success?
1420 """
1421 # fill defaults
1422 if not ns:
1423 ns = self._namespace
1424 # Handle res_type errors and choose resource type
1425 if not res_type:
1426 logger_cli.debug(
1427 "... resource type invalid: '{}'".format(res_type)
1428 )
1429 return False
1430 elif not name:
1431 logger_cli.debug("... resource name invalid: '{}'".format(name))
1432 return False
1433 elif res_type == "svc":
1434 # Delete service
1435 logger_cli.info("-> deleting svc {}/{}".format(ns, name))
1436 self.kube.CoreV1.delete_namespaced_service(name, ns)
1437 # TODO: Check if successfull
1438 elif res_type == "pod":
1439 # Delete a pod
1440 logger_cli.info("-> deleting pod {}/{}".format(ns, name))
1441 self.kube.CoreV1.delete_namespaced_pod(name, ns)
1442 if wait:
1443 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1444 elif res_type == "pvc":
1445 logger_cli.info("-> deleting pvc {}/{}".format(ns, name))
1446 self.kube.CoreV1.delete_namespaced_persistent_volume_claim(
1447 name,
1448 ns
1449 )
1450 if wait:
1451 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1452 elif res_type == "pv":
1453 logger_cli.info("-> deleting pv {}/{}".format(ns, name))
1454 self.kube.CoreV1.delete_persistent_volume(name)
1455 if wait:
1456 self.kube.wait_for_phase(res_type, name, None, ["Terminated"])
1457
1458 return True
Alexbfa947c2021-11-11 18:14:28 -06001459
1460 def get_resource_phase_by_name(self, typ, name, ns="qa-space"):
1461 if typ == "pod":
1462 _t = self.kube.get_pod_by_name_and_ns(name, ns)
1463 elif typ == "svc":
1464 _t = self.kube.get_svc_by_name_and_ns(name, ns)
1465 elif typ == "pvc":
1466 _t = self.kube.get_pvc_by_name_and_ns(name, ns)
1467 elif typ == "pv":
1468 _t = self.kube.get_pv_by_name(name)
1469 else:
1470 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1471 return None
1472
1473 if _t:
1474 return _t.status.phase
1475 else:
1476 return None
Alexb2129542021-11-23 15:49:42 -06001477
1478 def list_resource_names_by_type_and_ns(self, typ, ns="qa-space"):
1479 if typ == "pod":
1480 _items = self.kube.list_pods(ns)
1481 elif typ == "svc":
1482 _items = self.kube.list_svc(ns)
1483 elif typ == "pvc":
1484 _items = self.kube.list_pvc(ns)
1485 elif typ == "pv":
1486 _items = self.kube.list_pv()
1487 else:
1488 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1489 return None
1490 return [[i.metadata.namespace, i.metadata.name] for i in _items.items]
Alex0989ecf2022-03-29 13:43:21 -05001491
Alex0bcf31b2022-03-29 17:38:58 -05001492 def list_pod_names_with_containers(self, ns="qa-space", running_only=True):
1493 _result = []
1494 _pods = self.kube.list_pods(ns)
1495 if not running_only:
1496 for i in _pods.items:
1497 _result.append([
1498 i.metadata.namespace,
1499 i.metadata.name,
1500 [c.name for c in i.spec.containers]
1501 ])
1502 else:
1503 for i in _pods.items:
1504 if i.status.phase == "Running":
1505 _result.append([
1506 i.metadata.namespace,
1507 i.metadata.name,
1508 [c.name for c in i.status.container_statuses
1509 if c.state.running is not None]
1510 ])
1511 return _result
1512
1513 def get_logs_for_pod(self, podname, container, namespace, tail_lines):
1514 try:
1515 return self.kube.get_pod_logs(
1516 podname,
1517 container,
1518 namespace,
1519 tail_lines=tail_lines
1520 )
1521 except KubeException as e:
1522 logger_cli.warning(
1523 "WARNING: Log retrieval failed: '{}'".format(e.message)
1524 )
1525 return ""
1526
1527 def list_namespaces(self):
1528 return [i.metadata.name for i in self.kube.list_namespaces().items]