blob: 2e55b638876f57d1dee84e511487f48ddc10eafb [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alexe0c5b9e2019-04-23 18:51:23 -05003import json
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06004import os
Alex1f90e7b2021-09-03 15:31:28 -05005import yaml
Alex3ebc5632019-04-18 16:47:18 -05006from copy import deepcopy
Alex9a4ad212020-10-01 18:04:25 -05007from multiprocessing.dummy import Pool
Alex1f90e7b2021-09-03 15:31:28 -05008from time import sleep
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06009
Alex9a4ad212020-10-01 18:04:25 -050010from cfg_checker.clients import get_salt_remote, get_kube_remote
11from cfg_checker.common.const import all_salt_roles_map, all_kube_roles_map
Alexe9908f72020-05-19 16:04:53 -050012from cfg_checker.common.const import NODE_UP, NODE_DOWN, NODE_SKIP
Alex9a4ad212020-10-01 18:04:25 -050013from cfg_checker.common.const import ubuntu_versions, nova_openstack_versions
Alex7c9494e2019-04-22 10:40:59 -050014from cfg_checker.common import logger, logger_cli
Alexe0c5b9e2019-04-23 18:51:23 -050015from cfg_checker.common import utils
Alex9a4ad212020-10-01 18:04:25 -050016from cfg_checker.common.file_utils import create_temp_file_with_content
17from cfg_checker.common.exception import SaltException, KubeException
18from cfg_checker.common.ssh_utils import PortForward, SshShell
19from cfg_checker.common.settings import pkg_dir, ENV_TYPE_KUBE, ENV_TYPE_SALT
20from cfg_checker.helpers.console_utils import Progress
21
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060022
23node_tmpl = {
24 'role': '',
25 'node_group': '',
Alexe9908f72020-05-19 16:04:53 -050026 'status': NODE_DOWN,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060027 'pillars': {},
Alex9a4ad212020-10-01 18:04:25 -050028 'grains': {},
29 'raw': {}
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060030}
31
32
Alex9a4ad212020-10-01 18:04:25 -050033def _prepare_skipped_nodes(_names, skip_list, skip_list_file):
34 _skipped_minions = []
35 # skip list file
36 if skip_list_file:
37 _valid, _invalid = utils.get_nodes_list(skip_list_file)
Alex9a4ad212020-10-01 18:04:25 -050038 _skipped_minions.extend(_valid)
Alex359e5752021-08-16 17:28:30 -050039 if len(_invalid) < 1:
40 logger_cli.info(
41 "\n# WARNING: Detected invalid entries "
42 "in nodes skip list:\n{}\n".format(
43 "\n".join(_invalid)
44 )
45 )
Alexe8643642021-08-23 14:08:46 -050046
Alex9a4ad212020-10-01 18:04:25 -050047 # process wildcard, create node list out of mask
48 if skip_list:
49 _list = []
50 _invalid = []
51 for _item in skip_list:
52 if '*' in _item:
53 _str = _item[:_item.index('*')]
54 _nodes = [_m for _m in _names if _m.startswith(_str)]
55 if not _nodes:
56 logger_cli.warn(
57 "# WARNING: No nodes found for {}".format(_item)
58 )
59 _list.extend(_nodes)
60 else:
61 if _item in _names:
62 _list += _item
63 else:
64 logger_cli.warn(
65 "# WARNING: No node found for {}".format(_item)
66 )
67 # removing duplicates
68 _list = list(set(_list))
69 _skipped_minions.extend(_list)
70
71 return _skipped_minions
72
73
74class Nodes(object):
75 def __init__(self, config):
76 self.nodes = None
77 self.env_config = config
78
79 def skip_node(self, node):
80 # Add node to skip list
81 # Fro example if it is fails to comply with the rules
82
83 # check if we know such node
84 if node in self.nodes.keys() and node not in self.skip_list:
85 # yes, add it
86 self.skip_list.append(node)
87 return True
88 else:
89 return False
90
91 def get_nodes(self, skip_list=None, skip_list_file=None):
92 if not self.nodes:
93 if not skip_list and self.env_config.skip_nodes:
94 self.gather_node_info(
95 self.env_config.skip_nodes,
96 skip_list_file
97 )
98 else:
99 self.gather_node_info(skip_list, skip_list_file)
100 return self.nodes
101
102 def get_info(self):
103 _info = {
104 'mcp_release': self.mcp_release,
105 'openstack_release': self.openstack_release
106 }
107 return _info
108
109 def is_node_available(self, node, log=True):
110 if node in self.skip_list:
111 if log:
112 logger_cli.info("-> node '{}' not active".format(node))
113 return False
114 elif node in self.not_responded:
115 if log:
116 logger_cli.info("-> node '{}' not responded".format(node))
117 return False
118 else:
119 return True
120
121
122class SaltNodes(Nodes):
123 def __init__(self, config):
124 super(SaltNodes, self).__init__(config)
Alexe0c5b9e2019-04-23 18:51:23 -0500125 logger_cli.info("# Gathering environment information")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600126 # simple salt rest client
Alex9a4ad212020-10-01 18:04:25 -0500127 self.salt = None
128 self.env_type = ENV_TYPE_SALT
Alex3ebc5632019-04-18 16:47:18 -0500129
Alexe9908f72020-05-19 16:04:53 -0500130 def gather_node_info(self, skip_list, skip_list_file):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600131 # Keys for all nodes
132 # this is not working in scope of 2016.8.3, will overide with list
Alexb151fbe2019-04-22 16:53:30 -0500133 logger_cli.debug("... collecting node names existing in the cloud")
Alexe0c5b9e2019-04-23 18:51:23 -0500134 if not self.salt:
Alex9a4ad212020-10-01 18:04:25 -0500135 self.salt = get_salt_remote(self.env_config)
Alexe0c5b9e2019-04-23 18:51:23 -0500136
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600137 try:
138 _keys = self.salt.list_keys()
139 _str = []
Alex3bc95f62020-03-05 17:00:04 -0600140 for _k, _v in _keys.items():
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600141 _str.append("{}: {}".format(_k, len(_v)))
142 logger_cli.info("-> keys collected: {}".format(", ".join(_str)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600143
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600144 self.node_keys = {
145 'minions': _keys['minions']
146 }
Alex3ebc5632019-04-18 16:47:18 -0500147 except Exception:
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600148 _keys = None
149 self.node_keys = None
Alex3ebc5632019-04-18 16:47:18 -0500150
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600151 # List of minions with grains
152 _minions = self.salt.list_minions()
153 if _minions:
Alex3ebc5632019-04-18 16:47:18 -0500154 logger_cli.info(
155 "-> api reported {} active minions".format(len(_minions))
156 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600157 elif not self.node_keys:
158 # this is the last resort
Alex9a4ad212020-10-01 18:04:25 -0500159 _minions = self.env_config.load_nodes_list()
Alex3ebc5632019-04-18 16:47:18 -0500160 logger_cli.info(
161 "-> {} nodes loaded from list file".format(len(_minions))
162 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600163 else:
164 _minions = self.node_keys['minions']
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600165
Alexe9908f72020-05-19 16:04:53 -0500166 # Skip nodes if needed
Alex9a4ad212020-10-01 18:04:25 -0500167 _skipped_minions = \
168 _prepare_skipped_nodes(_minions, skip_list, skip_list_file)
Alexe9908f72020-05-19 16:04:53 -0500169
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600170 # in case API not listed minions, we need all that answer ping
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600171 _active = self.salt.get_active_nodes()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600172 logger_cli.info("-> nodes responded: {}".format(len(_active)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600173 # iterate through all accepted nodes and create a dict for it
174 self.nodes = {}
Alex Savatieievefa79c42019-03-14 19:14:04 -0500175 self.skip_list = []
Alexe9908f72020-05-19 16:04:53 -0500176 _domains = set()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600177 for _name in _minions:
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600178 _nc = utils.get_node_code(_name)
Alex9a4ad212020-10-01 18:04:25 -0500179 _rmap = all_salt_roles_map
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600180 _role = _rmap[_nc] if _nc in _rmap else 'unknown'
Alexe9908f72020-05-19 16:04:53 -0500181 if _name in _skipped_minions:
182 _status = NODE_SKIP
Alex Savatieievefa79c42019-03-14 19:14:04 -0500183 self.skip_list.append(_name)
Alexe9908f72020-05-19 16:04:53 -0500184 else:
185 _status = NODE_UP if _name in _active else NODE_DOWN
186 if _status == NODE_DOWN:
187 self.skip_list.append(_name)
188 logger_cli.info(
189 "-> '{}' is down, "
190 "added to skip list".format(
191 _name
192 )
193 )
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600194 self.nodes[_name] = deepcopy(node_tmpl)
Alexe9908f72020-05-19 16:04:53 -0500195 self.nodes[_name]['shortname'] = _name.split(".", 1)[0]
196 _domains.add(_name.split(".", 1)[1])
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600197 self.nodes[_name]['node_group'] = _nc
198 self.nodes[_name]['role'] = _role
199 self.nodes[_name]['status'] = _status
Alexe9908f72020-05-19 16:04:53 -0500200 _domains = list(_domains)
201 if len(_domains) > 1:
202 logger_cli.warning(
203 "Multiple domains detected: {}".format(",".join(_domains))
204 )
Alex205546c2020-12-30 19:22:30 -0600205 # TODO: Use domain with biggest node count by default
206 # or force it via config option
Alexe9908f72020-05-19 16:04:53 -0500207 else:
208 self.domain = _domains[0]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500209 logger_cli.info("-> {} nodes inactive".format(len(self.skip_list)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600210 logger_cli.info("-> {} nodes collected".format(len(self.nodes)))
211
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600212 # form an all nodes compound string to use in salt
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600213 self.active_nodes_compound = self.salt.compound_string_from_list(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600214 filter(
Alexe9908f72020-05-19 16:04:53 -0500215 lambda nd: self.nodes[nd]['status'] == NODE_UP,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600216 self.nodes
217 )
218 )
Alex41485522019-04-12 17:26:18 -0500219 # get master node fqdn
Alex3bc95f62020-03-05 17:00:04 -0600220 # _filtered = filter(
221 # lambda nd: self.nodes[nd]['role'] == const.all_roles_map['cfg'],
222 # self.nodes
223 # )
Alex9a4ad212020-10-01 18:04:25 -0500224 _role = all_salt_roles_map['cfg']
Alex3bc95f62020-03-05 17:00:04 -0600225 _filtered = [n for n, v in self.nodes.items() if v['role'] == _role]
Alexe0c5b9e2019-04-23 18:51:23 -0500226 if len(_filtered) < 1:
227 raise SaltException(
228 "No master node detected! Check/Update node role map."
229 )
230 else:
231 self.salt.master_node = _filtered[0]
Alex3ebc5632019-04-18 16:47:18 -0500232
Alex41485522019-04-12 17:26:18 -0500233 # OpenStack versions
234 self.mcp_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500235 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500236 "_param:apt_mk_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500237 )[self.salt.master_node]
Alex41485522019-04-12 17:26:18 -0500238 self.openstack_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500239 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500240 "_param:openstack_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500241 )[self.salt.master_node]
Alexd0391d42019-05-21 18:48:55 -0500242 # Preload codenames
243 # do additional queries to get linux codename and arch for each node
244 self.get_specific_pillar_for_nodes("_param:linux_system_codename")
245 self.get_specific_pillar_for_nodes("_param:linux_system_architecture")
246 for _name in self.nodes.keys():
Alexe9547d82019-06-03 15:22:50 -0500247 _n = self.nodes[_name]
248 if _name not in self.skip_list:
249 _p = _n['pillars']['_param']
250 _n['linux_codename'] = _p['linux_system_codename']
251 _n['linux_arch'] = _p['linux_system_architecture']
Alex41485522019-04-12 17:26:18 -0500252
Alex1839bbf2019-08-22 17:17:21 -0500253 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500254 """Function runs. cmd.run and parses result into place
255 or into dict structure provided
256
257 :return: no return value, data pulished internally
258 """
259 logger_cli.debug(
260 "... collecting results for '{}'".format(cmd)
261 )
262 if target_dict:
263 _nodes = target_dict
264 else:
265 _nodes = self.nodes
Alex1839bbf2019-08-22 17:17:21 -0500266 _result = self.execute_cmd_on_active_nodes(cmd, nodes=nodes)
Alex3bc95f62020-03-05 17:00:04 -0600267 for node, data in _nodes.items():
Alexf3dbe862019-10-07 15:17:04 -0500268
Alex836fac82019-08-22 13:36:16 -0500269 if node in self.skip_list:
270 logger_cli.debug(
271 "... '{}' skipped while collecting '{}'".format(
272 node,
273 cmd
274 )
275 )
276 continue
277 # Prepare target key
278 if target_key not in data:
279 data[target_key] = None
280 # Save data
Alexe9908f72020-05-19 16:04:53 -0500281 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex836fac82019-08-22 13:36:16 -0500282 data[target_key] = None
Alex1839bbf2019-08-22 17:17:21 -0500283 elif node not in _result:
284 continue
Alex836fac82019-08-22 13:36:16 -0500285 elif not _result[node]:
286 logger_cli.debug(
287 "... '{}' not responded after '{}'".format(
288 node,
Alex9a4ad212020-10-01 18:04:25 -0500289 self.env_config.salt_timeout
Alex836fac82019-08-22 13:36:16 -0500290 )
291 )
292 data[target_key] = None
293 else:
294 data[target_key] = _result[node]
295
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600296 def get_specific_pillar_for_nodes(self, pillar_path):
297 """Function gets pillars on given path for all nodes
298
299 :return: no return value, data pulished internally
300 """
Alex3ebc5632019-04-18 16:47:18 -0500301 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500302 "... collecting node pillars for '{}'".format(pillar_path)
Alex3ebc5632019-04-18 16:47:18 -0500303 )
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600304 _result = self.salt.pillar_get(self.active_nodes_compound, pillar_path)
Alex Savatieievefa79c42019-03-14 19:14:04 -0500305 self.not_responded = []
Alex3bc95f62020-03-05 17:00:04 -0600306 for node, data in self.nodes.items():
Alex Savatieievefa79c42019-03-14 19:14:04 -0500307 if node in self.skip_list:
308 logger_cli.debug(
309 "... '{}' skipped while collecting '{}'".format(
310 node,
311 pillar_path
312 )
313 )
314 continue
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600315 _pillar_keys = pillar_path.split(':')
316 _data = data['pillars']
317 # pre-create nested dict
318 for idx in range(0, len(_pillar_keys)-1):
319 _key = _pillar_keys[idx]
320 if _key not in _data:
321 _data[_key] = {}
322 _data = _data[_key]
Alexe9908f72020-05-19 16:04:53 -0500323 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex Savatieievefa79c42019-03-14 19:14:04 -0500324 _data[_pillar_keys[-1]] = None
325 elif not _result[node]:
326 logger_cli.debug(
327 "... '{}' not responded after '{}'".format(
328 node,
Alex9a4ad212020-10-01 18:04:25 -0500329 self.env_config.salt_timeout
Alex Savatieievefa79c42019-03-14 19:14:04 -0500330 )
331 )
332 _data[_pillar_keys[-1]] = None
333 self.not_responded.append(node)
334 else:
335 _data[_pillar_keys[-1]] = _result[node]
Alex3ebc5632019-04-18 16:47:18 -0500336
Alexe0c5b9e2019-04-23 18:51:23 -0500337 def prepare_json_on_node(self, node, _dict, filename):
Alex359e5752021-08-16 17:28:30 -0500338 if node in self.skip_list:
339 logger_cli.debug(
340 "... '{}' skipped while preparing json file of '{}'".format(
341 node,
342 filename
343 )
344 )
345
Alexe0c5b9e2019-04-23 18:51:23 -0500346 # this function assumes that all folders are created
347 _dumps = json.dumps(_dict, indent=2).splitlines()
348 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500349 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alexe0c5b9e2019-04-23 18:51:23 -0500350 )
351 logger_cli.debug(
352 "... uploading data as '{}' "
353 "to master's file cache folder: '{}'".format(
354 filename,
355 _storage_path
356 )
357 )
358 _cache_path = os.path.join(_storage_path, filename)
359 _source_path = os.path.join(
360 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500361 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500362 filename
363 )
364 _target_path = os.path.join(
365 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500366 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500367 filename
368 )
369
370 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
371 self.salt.f_touch_master(_cache_path)
372 self.salt.f_append_master(_cache_path, _dumps)
373 logger.debug("... syncing file to '{}'".format(node))
374 self.salt.get_file(
375 node,
376 _source_path,
377 _target_path,
378 tgt_type="compound"
379 )
380 return _target_path
381
382 def prepare_script_on_active_nodes(self, script_filename):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600383 # Prepare script
384 _p = os.path.join(pkg_dir, 'scripts', script_filename)
385 with open(_p, 'rt') as fd:
386 _script = fd.read().splitlines()
387 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500388 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600389 )
390 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500391 "... uploading script {} "
Alex3ebc5632019-04-18 16:47:18 -0500392 "to master's file cache folder: '{}'".format(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600393 script_filename,
394 _storage_path
395 )
396 )
Alexe0c5b9e2019-04-23 18:51:23 -0500397 self.salt.mkdir(self.salt.master_node, _storage_path)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600398 # Form cache, source and target path
399 _cache_path = os.path.join(_storage_path, script_filename)
400 _source_path = os.path.join(
401 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500402 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600403 script_filename
404 )
405 _target_path = os.path.join(
406 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500407 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600408 script_filename
409 )
410
Alexb151fbe2019-04-22 16:53:30 -0500411 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
Alex3ebc5632019-04-18 16:47:18 -0500412 self.salt.f_touch_master(_cache_path)
413 self.salt.f_append_master(_cache_path, _script)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600414 # command salt to copy file to minions
Alex3ebc5632019-04-18 16:47:18 -0500415 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500416 "... creating script target folder '{}'".format(
Alex3ebc5632019-04-18 16:47:18 -0500417 _cache_path
418 )
419 )
420 self.salt.mkdir(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600421 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600422 os.path.join(
423 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500424 self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600425 ),
426 tgt_type="compound"
427 )
Alex3ebc5632019-04-18 16:47:18 -0500428 logger.debug("... syncing file to nodes")
429 self.salt.get_file(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600430 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600431 _source_path,
432 _target_path,
433 tgt_type="compound"
434 )
Alexe0c5b9e2019-04-23 18:51:23 -0500435 # return path on nodes, just in case
436 return _target_path
437
438 def execute_script_on_node(self, node, script_filename, args=[]):
439 # Prepare path
440 _target_path = os.path.join(
441 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500442 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500443 script_filename
444 )
445
446 # execute script
447 logger.debug("... running script on '{}'".format(node))
448 # handle results for each node
449 _script_arguments = " ".join(args) if args else ""
450 self.not_responded = []
451 _r = self.salt.cmd(
452 node,
453 'cmd.run',
454 param='python {} {}'.format(_target_path, _script_arguments),
455 expr_form="compound"
456 )
457
458 # all false returns means that there is no response
459 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
460 return _r
461
Alex1f90e7b2021-09-03 15:31:28 -0500462 def execute_script_on_active_nodes(self, script_filename, args=None):
Alexe0c5b9e2019-04-23 18:51:23 -0500463 # Prepare path
464 _target_path = os.path.join(
465 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500466 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500467 script_filename
468 )
469
470 # execute script
Alexd0391d42019-05-21 18:48:55 -0500471 logger_cli.debug("... running script")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600472 # handle results for each node
Alex1f90e7b2021-09-03 15:31:28 -0500473 _script_arguments = args if args else ""
Alex Savatieievefa79c42019-03-14 19:14:04 -0500474 self.not_responded = []
475 _r = self.salt.cmd(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600476 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600477 'cmd.run',
478 param='python {} {}'.format(_target_path, _script_arguments),
479 expr_form="compound"
480 )
481
Alex Savatieievefa79c42019-03-14 19:14:04 -0500482 # all false returns means that there is no response
Alex3ebc5632019-04-18 16:47:18 -0500483 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500484 return _r
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600485
Alex1839bbf2019-08-22 17:17:21 -0500486 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500487 # execute cmd
488 self.not_responded = []
489 _r = self.salt.cmd(
Alex1839bbf2019-08-22 17:17:21 -0500490 nodes if nodes else self.active_nodes_compound,
Alex836fac82019-08-22 13:36:16 -0500491 'cmd.run',
492 param=cmd,
493 expr_form="compound"
494 )
495
496 # all false returns means that there is no response
497 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
498 return _r
499
Alex9a4ad212020-10-01 18:04:25 -0500500
501class KubeNodes(Nodes):
502 def __init__(self, config):
503 super(KubeNodes, self).__init__(config)
504 logger_cli.info("# Gathering environment information")
505 # simple salt rest client
506 self.kube = get_kube_remote(self.env_config)
507 self.env_type = ENV_TYPE_KUBE
Alex1f90e7b2021-09-03 15:31:28 -0500508 self._namespace = "qa-space"
509 self._configmap_name = self.env_config.kube_scripts_folder
510
511 # prepare needed resources
512 self._check_namespace()
513 self._scripts = self._check_config_map()
Alexb78191f2021-11-02 16:35:46 -0500514 self.prepared_daemonsets = []
Alex1f90e7b2021-09-03 15:31:28 -0500515
516 def _check_namespace(self):
517 # ensure namespace
518 logger_cli.debug(
519 "... checking namespace '{}'".format(self._namespace)
520 )
521 if not self.kube.ensure_namespace(self._namespace):
522 raise KubeException(
523 "Failed to manage namespace '{}'".format(self._namespace)
524 )
525
526 def _check_config_map(self):
527 # ensure config map exists
528 logger_cli.debug(
529 "... checking config map '{}'".format(self._configmap_name)
530 )
531 _source = os.path.join(pkg_dir, 'scripts')
532 return self.kube.create_config_map(
533 self._namespace,
534 self._configmap_name,
535 _source
536 )
Alex9a4ad212020-10-01 18:04:25 -0500537
538 def gather_node_info(self, skip_list, skip_list_file):
539 # Gather nodes info and query pod lists for each node
540 logger_cli.debug("... collecting node names existing in the cloud")
541
542 # Gather node names and info
543 _nodes = self.kube.get_node_info()
544 _node_names = list(_nodes.keys())
545 # Skip nodes if needed
546 _skipped_nodes = \
547 _prepare_skipped_nodes(_node_names, skip_list, skip_list_file)
548
549 # Count how many nodes active
550 self._active = [n for n, v in _nodes.items()
551 if v['conditions']['ready']['status']]
552
553 # iterate through all accepted nodes and create a dict for it
554 self.nodes = {}
555 self.skip_list = []
Alex9a4ad212020-10-01 18:04:25 -0500556 for _name in _node_names:
557 if _name in _skipped_nodes:
558 _status = NODE_SKIP
559 self.skip_list.append(_name)
560 else:
561 _status = NODE_UP if _name in self._active else NODE_DOWN
562 if _status == NODE_DOWN:
563 self.skip_list.append(_name)
564 logger_cli.info(
565 "-> '{}' shows 'Ready' as 'False', "
566 "added to skip list".format(
567 _name
568 )
569 )
570 _roles = {}
571 _labels = {}
572 for _label, _value in _nodes[_name]['labels'].items():
573 if _label in all_kube_roles_map:
574 _roles[all_kube_roles_map[_label]] = _value
575 else:
576 _labels[_label] = _value
577
578 self.nodes[_name] = deepcopy(node_tmpl)
579 self.nodes[_name].pop("grains")
580 self.nodes[_name].pop("pillars")
581
582 # hostname
583 self.nodes[_name]['shortname'] = \
584 _nodes[_name]['addresses']['hostname']['address']
Alexe4de1142022-11-04 19:26:03 -0500585 # internal
Alex9a4ad212020-10-01 18:04:25 -0500586 self.nodes[_name]['internalip'] = \
587 _nodes[_name]['addresses']['internalip']['address']
Alexe4de1142022-11-04 19:26:03 -0500588 # alternate
589 if self.env_config.force_node_network is not None:
590 iIP = self.nodes[_name]['internalip']
591 # use last number
592 aIP = self.env_config.force_node_network + iIP.split('.')[-1]
593 self.nodes[_name]["altip"] = aIP
Alex9a4ad212020-10-01 18:04:25 -0500594 self.nodes[_name]['node_group'] = None
595 self.nodes[_name]['labels'] = _labels
596 self.nodes[_name]['roles'] = _roles
597 self.nodes[_name]['status'] = _status
598 # Backward compatibility
599 _info = _nodes[_name]['status']['node_info']
600 self.nodes[_name]['linux_image'] = _info['os_image']
601 self.nodes[_name]['linux_arch'] = _info['architecture']
602
603 _codename = "unknown"
604 _n, _v, _c = _info['os_image'].split()
605 if _n.lower() == 'ubuntu':
606 _v, _, _ = _v.rpartition('.') if '.' in _v else (_v, "", "")
607 if _v in ubuntu_versions:
608 _codename = ubuntu_versions[_v].split()[0].lower()
609 self.nodes[_name]['linux_codename'] = _codename
610
611 # Consider per-data type transfer
612 self.nodes[_name]["raw"] = _nodes[_name]
613 # TODO: Investigate how to handle domains in Kube, probably - skip
614 # _domains = list(_domains)
615 # if len(_domains) > 1:
616 # logger_cli.warning(
617 # "Multiple domains detected: {}".format(",".join(_domains))
618 # )
619 # else:
Alex1f90e7b2021-09-03 15:31:28 -0500620 self.domain = "no.domain.in.kube.yet"
Alex9a4ad212020-10-01 18:04:25 -0500621 logger_cli.info(
622 "-> {} nodes collected: {} - active, {} - not active".format(
623 len(self.nodes),
624 len(self._active),
625 len(self.skip_list)
626 )
627 )
628
629 _role = "k8s-master"
630 _filtered = [n for n, v in self.nodes.items() if _role in v['roles']]
631 if len(_filtered) < 1:
632 raise KubeException(
633 "No k8s-master nodes detected! Check/Update node role map."
634 )
Alex Savatieievefa79c42019-03-14 19:14:04 -0500635 else:
Alex9a4ad212020-10-01 18:04:25 -0500636 _r = [n for n, v in self.nodes.items()
637 if v['status'] != NODE_UP and _role in v['roles']]
638 if len(_r) > 0:
639 logger_cli.warn(
640 "Master nodes are reporting 'NotReady:\n{}".format(
641 "\n".join(_r)
642 )
643 )
644 self.kube.master_node = _filtered[0]
Alexe0c5b9e2019-04-23 18:51:23 -0500645
Alex9a4ad212020-10-01 18:04:25 -0500646 # get specific data upfront
647 # OpenStack versions
648 self.mcp_release = ""
649 # Quick and Dirty way to detect OS release
Alexccb72e02021-01-20 16:38:03 -0600650 try:
651 _nova_version = self.kube.exec_on_target_pod(
652 "nova-manage --version",
653 "nova-api-osapi",
654 "openstack"
655 )
656 _nmajor = _nova_version.partition('.')[0]
657 self.openstack_release = nova_openstack_versions[_nmajor]
658 except KubeException as e:
659 logger_cli.warn("Openstack not detected: {}".format(e.message))
660 self.openstack_release = nova_openstack_versions["00"]
Alexe0c5b9e2019-04-23 18:51:23 -0500661
Alex9a4ad212020-10-01 18:04:25 -0500662 return
663
664 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500665 def _get_ssh_shell(_h, _u, _k, _p, _q, _pipe, timeout=15):
Alex9a4ad212020-10-01 18:04:25 -0500666 _ssh = SshShell(
667 _h,
668 user=_u,
669 keypath=_k,
670 port=_p,
671 silent=_q,
Alex1f90e7b2021-09-03 15:31:28 -0500672 piped=_pipe,
673 timeout=timeout
Alex9a4ad212020-10-01 18:04:25 -0500674 )
675 return _ssh.connect()
676
677 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500678 def _do_ssh_cmd(_cmd, _h, _u, _k, _p, _q, _pipe, timeout=None):
Alex9a4ad212020-10-01 18:04:25 -0500679 with SshShell(
680 _h,
681 user=_u,
682 keypath=_k,
683 port=_p,
684 silent=_q,
685 piped=_pipe
686 ) as ssh:
Alex1f90e7b2021-09-03 15:31:28 -0500687 if timeout is None:
688 _r = ssh.do(_cmd)
689 else:
690 _r = ssh.do(_cmd, timeout=timeout)
Alex9a4ad212020-10-01 18:04:25 -0500691 logger_cli.debug("'{}'".format(_r))
692 return _r
693
694 def node_shell(
695 self,
696 node,
697 silent=True,
698 piped=True,
699 use_sudo=True,
700 fport=None
701 ):
702 _u = self.env_config.kube_node_user
703 _k = self.env_config.kube_node_keypath
Alexe4de1142022-11-04 19:26:03 -0500704
705 _n = self.nodes[node]
706 _h = _n['altip'] if "altip" in _n else _n['internalip']
Alex9a4ad212020-10-01 18:04:25 -0500707 _p = 22
Alexeffa0682021-06-04 12:18:33 -0500708 if self.kube.is_local or self.kube.config.ssh_direct:
Alexf6ec91b2021-09-10 10:11:17 -0500709 logger.debug("Getting shell with no port forward")
710 return [None, self._get_ssh_shell(
Alex1f90e7b2021-09-03 15:31:28 -0500711 _h, _u, _k, _p, silent, piped,
712 timeout=self.kube.config.ssh_connect_timeout
Alexf6ec91b2021-09-10 10:11:17 -0500713 )]
Alex9a4ad212020-10-01 18:04:25 -0500714 else:
Alexf6ec91b2021-09-10 10:11:17 -0500715 logger.debug("Getting shell with with forward")
Alex9a4ad212020-10-01 18:04:25 -0500716 _fh = "localhost"
717 _p = 10022 if not fport else fport
718 _pfwd = PortForward(
719 self.env_config.ssh_host,
720 _h,
721 user=_u,
722 keypath=self.env_config.ssh_key,
Alex1f90e7b2021-09-03 15:31:28 -0500723 loc_port=_p,
724 timeout=self.kube.config.ssh_connect_timeout
Alex9a4ad212020-10-01 18:04:25 -0500725 )
726 _pfwd.connect()
Alex1f90e7b2021-09-03 15:31:28 -0500727 _ssh = self._get_ssh_shell(
728 _fh,
729 _u,
730 _k,
731 _p,
732 silent,
733 piped,
734 timeout=self.kube.config.ssh_connect_timeout
735 )
Alexf6ec91b2021-09-10 10:11:17 -0500736 return [_pfwd, _ssh]
Alex9a4ad212020-10-01 18:04:25 -0500737
738 def execute_script_on_node(self, node, script_filename, args=[]):
739 # Prepare path
740 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600741 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500742 self.env_config.kube_scripts_folder,
743 script_filename
744 )
745
746 # execute script
747 logger_cli.debug("... running script on '{}'".format(node))
748 # handle results for each node
749 _script_arguments = " ".join(args) if args else ""
750 self.not_responded = []
751 # get result
752 _nr = self.node_shell(
753 node,
754 "python {} {}".format(
755 _target_path,
756 _script_arguments
757 )
758 )
759
760 if not _nr:
761 self.not_responded.append(node)
762 return {}
763 else:
764 return {node: _nr}
765
766 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
767 # execute script
768 logger_cli.debug("...running '{}' on active nodes".format(cmd))
769 # handle results for each node
770 self.not_responded = []
771 _r = {}
772 # TODO: Use threading and pool
773 for node in self._active:
774 _nr = self.node_shell(
775 node,
776 cmd
777 )
778
779 if not _nr:
780 self.not_responded.append(node)
781 else:
782 _r[node] = _nr
783
784 return _r
785
Alex1f90e7b2021-09-03 15:31:28 -0500786 def _ssh_exec_script(self, params):
Alex9a4ad212020-10-01 18:04:25 -0500787 """
788 Threadsafe method to get shell to node,
789 check/copy script and get results
790 [
791 node_name,
792 src_path,
793 tgt_path,
794 conf,
795 args
796 ]
797 """
Alex1f90e7b2021-09-03 15:31:28 -0500798 _timeout = self.kube.config.script_execution_timeout
Alex9a4ad212020-10-01 18:04:25 -0500799 _name = params[0]
800 _src = params[1]
801 _tgt = params[2]
802 _conf = params[3]
803 _args = params[4]
804 _port = params[5]
805 _log_name = "["+_name+"]:"
806 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
807 "else echo False; fi)"
808 _fwd_sh, _sh = self.node_shell(
809 _name,
810 use_sudo=False,
811 fport=_port
812 )
813 # check python3
814 _python = _sh.do("which python3")
815 _python = utils.to_bool(
816 _sh.do(_check.format(_python))
817 )
818 if not _python:
Alex1f90e7b2021-09-03 15:31:28 -0500819 _sh.do("apt install python3", sudo=True, timeout=_timeout)
Alex9a4ad212020-10-01 18:04:25 -0500820 # check if script already there
821 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600822 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500823 _conf.kube_scripts_folder
824 )
825 # check if folder exists
826 _folder_exists = utils.to_bool(
827 _sh.do(_check.format(_folder))
828 )
829 if not _folder_exists:
830 _sh.do("mkdir " + _folder)
831 logger.info("{} Syncing file".format(_log_name))
832 _code, _r, _e = _sh.scp(
833 _src,
834 _sh.get_host_path(_tgt),
835 )
836 # handle error code
837 if _code:
838 logger_cli.warn(
839 "{} Error in scp:\n"
840 "\tstdout:'{}'\n"
841 "\tstderr:'{}'".format(_log_name, _r, _e)
842 )
843
844 # execute script
845 logger.debug("{} Running script".format(_log_name))
846 _out = _sh.do(
847 "python3 {}{}".format(
848 _tgt,
849 _args
850 ),
Alex1f90e7b2021-09-03 15:31:28 -0500851 sudo=True,
852 timeout=_timeout
Alex9a4ad212020-10-01 18:04:25 -0500853 )
854
855 if _fwd_sh:
856 _fwd_sh.kill()
857 _sh.kill()
858
859 return [_name, _out]
860
Alex1f90e7b2021-09-03 15:31:28 -0500861 def execute_script_on_active_nodes(self, script_filename, args=None):
Alex9a4ad212020-10-01 18:04:25 -0500862 # Prepare script
863 _source_path = os.path.join(pkg_dir, 'scripts', script_filename)
864 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600865 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500866 self.env_config.kube_scripts_folder,
867 script_filename
868 )
869 # handle results for each node
870 _script_arguments = " ".join(args) if args else ""
871 if _script_arguments:
872 _script_arguments = " " + _script_arguments
873 self.not_responded = []
874 _results = {}
875 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500876 "... running '{}' on active nodes, {} worker threads".format(
Alex9a4ad212020-10-01 18:04:25 -0500877 script_filename,
878 self.env_config.threads
879 )
880 )
881 # Workers pool
882 pool = Pool(self.env_config.threads)
883
884 # init the parameters
885 # node_name,
886 # src_path,
887 # tgt_path,
888 # conf,
889 # args
890 _params = []
891 _port = 10022
892 for node in self._active:
893 # build parameter blocks
894 _p_list = [
895 node,
896 _source_path,
897 _target_path,
898 self.env_config,
899 _script_arguments,
900 _port
901 ]
902 _params.append(_p_list)
903 _port += 1
904
905 _progress = Progress(len(_params))
Alex1f90e7b2021-09-03 15:31:28 -0500906 results = pool.imap_unordered(self._ssh_exec_script, _params)
Alex9a4ad212020-10-01 18:04:25 -0500907
908 for ii in enumerate(results, start=1):
909 if not ii[1][1]:
910 self.not_responded.append(ii[1][0])
911 else:
912 _results[ii[1][0]] = ii[1][1]
913 _progress.write_progress(ii[0])
914
915 _progress.end()
916 pool.close()
917 pool.join()
918
919 # return path on nodes, just in case
920 return _results
921
922 def prepare_json_on_node(self, node, _dict, filename):
923 # this function assumes that all folders are created
924 _dumps = json.dumps(_dict, indent=2).splitlines()
925
926 _source_path = create_temp_file_with_content(_dumps)
927 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600928 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500929 self.env_config.kube_scripts_folder,
930 filename
931 )
932 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600933 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500934 self.env_config.kube_scripts_folder
935 )
936 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
937 "else echo False; fi)"
938 _fwd_sh, _sh = self.node_shell(
939 node,
940 use_sudo=False
941 )
942
943 # check if folder exists
944 _folder_exists = utils.to_bool(
945 _sh.do(_check.format(_folder))
946 )
947 if not _folder_exists:
948 _sh.do("mkdir " + _folder)
949 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500950 "... create data on node '{}':'{}'".format(node, _target_path)
Alex9a4ad212020-10-01 18:04:25 -0500951 )
952 _code, _r, _e = _sh.scp(
953 _source_path,
954 _sh.get_host_path(_target_path),
955 )
956 # handle error code
957 if _code:
958 logger_cli.warn(
959 "Error in scp:\n"
960 "\tstdout:'{}'\n"
961 "\tstderr:'{}'".format(_r, _e)
962 )
963
964 _fwd_sh.kill()
965 _sh.kill()
966 return _target_path
Alex1f90e7b2021-09-03 15:31:28 -0500967
Alex7b0ee9a2021-09-21 17:16:17 -0500968 def prepare_daemonset(self, template_filename):
Alex1f90e7b2021-09-03 15:31:28 -0500969 # load template
970 _yaml_file = os.path.join(pkg_dir, 'templates', template_filename)
971 logger_cli.debug("... loading template '{}'".format(_yaml_file))
972 _ds = {}
973 with open(_yaml_file) as dsFile:
974 _ds = yaml.load(dsFile, Loader=yaml.SafeLoader)
975
976 # Add scripts to pod template as volumeMounts
977 _tspec = _ds['spec']['template']['spec']
978 _tspec['containers'][0]['volumeMounts'] = [
979 {
980 "name": "scripts",
981 "mountPath": os.path.join(
982 "/",
983 self.env_config.kube_scripts_folder
984 )
985 }
986 ]
987
988 _tspec['volumes'] = [
989 {
990 "name": "scripts",
991 "configMap": {
992 "name": self._configmap_name
993 }
994 }
995 ]
996
997 # create daemonset
998 logger_cli.debug("... preparing daemonset")
Alexb78191f2021-11-02 16:35:46 -0500999 _ds = self.kube.prepare_daemonset_from_yaml(self._namespace, _ds)
1000 # Save prepared daemonset
1001 self.prepared_daemonsets.append(_ds)
1002 # return it
1003 return _ds
Alex1f90e7b2021-09-03 15:31:28 -05001004
1005 def wait_for_daemonset(self, ds, timeout=120):
1006 # iteration timeout
1007 _sleep_time = 5
1008 _timeout = timeout
1009
1010 # query daemonset and check that desired=scheduled=ready
1011 _ds = self.kube.get_daemon_set_by_name(
1012 ds.metadata.namespace,
1013 ds.metadata.name
1014 )
1015
1016 _total = len(self.nodes)
1017 # _scheduled = _ds.status.scheduled
1018 # _ready = _ds.status.ready
1019
1020 # Init Progress bar to show daemonset readiness
1021 _progress = Progress(_total)
1022 while _timeout > 0:
1023 # get new status
1024 _ds = self.kube.get_daemon_set_by_name(
1025 ds.metadata.namespace,
1026 ds.metadata.name
1027 )
1028 _desired = _ds.status.desired_number_scheduled
1029 _scheduled = _ds.status.current_number_scheduled
1030 _ready = _ds.status.number_ready
1031 _updated = _ds.status.updated_number_scheduled
1032 # print it
1033 _progress.write_progress(
1034 _ready,
1035 note="desired: {}, scheduled: {}, ready: {},"
1036 " up-to-date: {}".format(
1037 _desired,
1038 _scheduled,
1039 _ready,
1040 _updated
1041 )
1042 )
1043
1044 # check values and return
1045 # In case of Update, also checking _updated value
Alex163aa042022-12-01 11:58:32 -06001046 if _ready == _updated and _ready == _total - len(self.skip_list):
Alex1f90e7b2021-09-03 15:31:28 -05001047 # close progress bar class
1048 _progress.end()
1049 logger_cli.debug("... daemonset is ready")
1050 return True
1051 # iterate
1052 _timeout -= _sleep_time
1053 # wait
1054 sleep(_sleep_time)
1055
1056 # timed out
1057 _progress.end()
1058 # log it
1059 logger_cli.error("Timed out waiting for Daemonset to be ready")
1060 return False
1061
Alexdcb792f2021-10-04 14:24:21 -05001062 def exec_script_on_target_pod(self, pod_name, script_filename, args=None):
Alex7b0ee9a2021-09-21 17:16:17 -05001063 """
1064 Run script from configmap on target pod assuming it is present
1065 """
1066 _arguments = args if args else ""
1067 _cmd = [
1068 "python3",
1069 os.path.join(
1070 "/",
1071 self.env_config.kube_scripts_folder,
1072 script_filename
1073 )
1074 ] + _arguments
1075 _result = self.kube.exec_on_target_pod(
1076 _cmd,
1077 pod_name,
1078 self._namespace,
1079 strict=True
1080 )
1081 return _result
1082
Alexdcb792f2021-10-04 14:24:21 -05001083 def exec_cmd_on_target_pod(self, pod_name, ns, command_str):
1084 """
1085 Run script from configmap on target pod assuming it is present
1086 """
1087 _result = self.kube.exec_on_target_pod(
1088 command_str,
1089 pod_name,
1090 ns,
1091 strict=True
1092 )
1093 return _result
1094
Alexb78191f2021-11-02 16:35:46 -05001095 def execute_cmd_on_daemon_set(
1096 self,
1097 ds,
1098 cmd,
Alexb2129542021-11-23 15:49:42 -06001099 _args=None,
Alexb78191f2021-11-02 16:35:46 -05001100 is_script=False
1101 ):
Alex1f90e7b2021-09-03 15:31:28 -05001102 """
1103 Query daemonset for pods and execute script on all of them
1104 """
Alexb2129542021-11-23 15:49:42 -06001105 _results = self.exec_cmd_on_pods(
1106 self.kube.get_pods_for_daemonset(ds),
1107 cmd,
1108 _args=_args,
1109 is_script=is_script
1110 )
1111 # Update results
1112 _ds_results = {}
1113 for _n, _, _v in _results:
1114 _ds_results[_n] = _v
1115 return _ds_results
1116
Alexe4de1142022-11-04 19:26:03 -05001117 def exec_on_labeled_pods_and_ns(
1118 self,
1119 label_str,
1120 cmd,
1121 _args=None,
1122 ns=None,
1123 silent=False
1124 ):
Alexb2129542021-11-23 15:49:42 -06001125 if not ns:
1126 ns = self._namespace
1127 _results = self.exec_cmd_on_pods(
1128 self.kube.list_pods(ns, label_str=label_str),
1129 cmd,
Alexe4de1142022-11-04 19:26:03 -05001130 _args=_args,
1131 silent=silent
Alexb2129542021-11-23 15:49:42 -06001132 )
1133 _pod_results = {}
1134 for _, _p, _v in _results:
1135 _pod_results[_p] = _v
1136 return _pod_results
1137
1138 def exec_cmd_on_pods(
1139 self,
1140 pod_list,
1141 cmd,
1142 _args=None,
Alexe4de1142022-11-04 19:26:03 -05001143 is_script=False,
1144 silent=False
Alexb2129542021-11-23 15:49:42 -06001145 ):
Alex1f90e7b2021-09-03 15:31:28 -05001146 def _kube_exec_on_pod(plist):
1147 return [
1148 plist[1], # node
1149 plist[3], # pod name
1150 plist[0].kube.exec_on_target_pod( # pointer to function
1151 plist[4], # cmd
1152 plist[3], # pod name
1153 plist[2], # namespace
1154 strict=True,
1155 _request_timeout=120,
Alexb78191f2021-11-02 16:35:46 -05001156 arguments=plist[5]
Alex1f90e7b2021-09-03 15:31:28 -05001157 )
1158 ]
1159
Alex1f90e7b2021-09-03 15:31:28 -05001160 # Create map for threads: [[node_name, ns, pod_name, cmd]...]
1161 logger_cli.debug(
1162 "... runnning script on {} pods using {} threads at a time".format(
Alexb2129542021-11-23 15:49:42 -06001163 len(pod_list.items),
Alex1f90e7b2021-09-03 15:31:28 -05001164 self.env_config.threads
1165 )
1166 )
1167 _plist = []
Alexb2129542021-11-23 15:49:42 -06001168 _arguments = _args if _args else ""
Alexb78191f2021-11-02 16:35:46 -05001169 if is_script:
1170 _cmd = [
1171 "python3",
1172 os.path.join(
1173 "/",
1174 self.env_config.kube_scripts_folder,
1175 cmd
1176 ),
1177 _arguments
1178 ]
1179 _cmd = " ".join(_cmd)
1180 else:
1181 # decide if we are to wrap it to bash
1182 if '|' in cmd:
1183 _cmd = "bash -c"
1184 _arguments = cmd
1185 else:
1186 _cmd = cmd
Alexb2129542021-11-23 15:49:42 -06001187 for item in pod_list.items:
Alex1f90e7b2021-09-03 15:31:28 -05001188 _plist.append(
1189 [
1190 self,
1191 item.spec.node_name,
1192 item.metadata.namespace,
1193 item.metadata.name,
Alexb78191f2021-11-02 16:35:46 -05001194 _cmd,
1195 _arguments
Alex1f90e7b2021-09-03 15:31:28 -05001196 ]
1197 )
1198
1199 # map func and cmd
Alex1f90e7b2021-09-03 15:31:28 -05001200 pool = Pool(self.env_config.threads)
Alexb2129542021-11-23 15:49:42 -06001201 _results = []
Alex1f90e7b2021-09-03 15:31:28 -05001202 self.not_responded = []
1203 # create result list
Alexe4de1142022-11-04 19:26:03 -05001204 if not silent:
1205 _progress = Progress(len(_plist))
Alex1f90e7b2021-09-03 15:31:28 -05001206 ret = pool.imap_unordered(_kube_exec_on_pod, _plist)
1207
1208 for ii in enumerate(ret, start=1):
1209 if not ii[1][1]:
1210 self.not_responded.append(ii[1][0])
1211 else:
Alexb2129542021-11-23 15:49:42 -06001212 _results.append(ii[1])
Alexe4de1142022-11-04 19:26:03 -05001213 if not silent:
1214 _progress.write_progress(ii[0])
Alex1f90e7b2021-09-03 15:31:28 -05001215
Alexe4de1142022-11-04 19:26:03 -05001216 if not silent:
1217 _progress.end()
Alex1f90e7b2021-09-03 15:31:28 -05001218 pool.close()
1219 pool.join()
1220 logger_cli.debug(
1221 "... done, {} total outputs; {} not responded".format(
1222 len(_results),
1223 len(self.not_responded)
1224 )
1225 )
1226 return _results
1227
1228 def delete_daemonset(self, ds):
1229 # Try to delete daemonset
1230 try:
1231 _r = self.kube.delete_daemon_set_by_name(
1232 ds.metadata.namespace,
1233 ds.metadata.name
1234 )
1235 except Exception as e:
1236 logger_cli.warning("Failed to delete daemonset '{}': {}".format(
1237 ds.metadata.name,
1238 e.reason
1239 ))
1240 _r = None
1241 return _r
Alex7b0ee9a2021-09-21 17:16:17 -05001242
1243 def get_pod_name_in_daemonset_by_node(self, nodename, daemonset):
1244 _podname = None
1245 _pods = self.kube.get_pods_for_daemonset(daemonset)
1246 for item in _pods.items:
1247 if item.spec.node_name == nodename:
1248 _podname = item.metadata.name
1249
1250 return _podname
1251
1252 def prepare_json_in_pod(self, podname, namespace, targets, filename):
1253 # Iterate pods in daemonset and prepare json file on each one
1254 _target_path = os.path.join(
1255 "/",
1256 "tmp",
1257 filename
1258 )
1259 # check folder will probably not needed as the daemonset links
1260 # configmap there on creation
1261 # _folder = os.path.join(
1262 # self.env_config.kube_node_homepath,
1263 # self.env_config.kube_scripts_folder
1264 # )
1265 # prepare data
1266 buffer = json.dumps(targets, indent=2).encode('utf-8')
1267
1268 # write data to pod using fancy websocket function
1269 self.kube.put_string_buffer_to_pod_as_textfile(
1270 podname,
1271 namespace,
1272 buffer,
1273 _target_path
1274 )
1275
1276 # TODO: Exception handling
1277
1278 return _target_path
Alexb78191f2021-11-02 16:35:46 -05001279
1280 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
1281 """Function runs command on daemonset and parses result into place
1282 or into dict structure provided
1283
1284 :return: no return value, data pulished internally
1285 """
1286 logger_cli.debug(
1287 "... collecting results for '{}'".format(cmd)
1288 )
1289 if target_dict:
1290 _nodes = target_dict
1291 else:
1292 _nodes = self.nodes
1293 # Dirty way to get daemonset that was used in checker and not deleted
1294 _ds = self.prepared_daemonsets[0]
1295 _result = self.execute_cmd_on_daemon_set(_ds, cmd)
1296 for node, data in _nodes.items():
1297
1298 if node in self.skip_list:
1299 logger_cli.debug(
1300 "... '{}' skipped while collecting '{}'".format(
1301 node,
1302 cmd
1303 )
1304 )
1305 continue
1306 # Prepare target key
1307 if target_key not in data:
1308 data[target_key] = None
1309 # Save data
1310 if data['status'] in [NODE_DOWN, NODE_SKIP]:
1311 data[target_key] = None
1312 elif node not in _result:
1313 continue
1314 elif not _result[node]:
1315 logger_cli.debug(
1316 "... '{}' not responded after '{}'".format(
1317 node,
1318 self.env_config.salt_timeout
1319 )
1320 )
1321 data[target_key] = None
1322 else:
1323 data[target_key] = _result[node]
Alex5cace3b2021-11-10 16:40:37 -06001324
1325 def prepare_benchmark_agent(self, index, path, sc, size, template):
1326 # Load pod template
1327 _yaml_file = os.path.join(pkg_dir, 'templates', template)
1328 logger_cli.debug("... loading template '{}'".format(_yaml_file))
1329 _pod = {}
1330 with open(_yaml_file) as podFile:
1331 _pod = yaml.load(podFile, Loader=yaml.SafeLoader)
1332
1333 # set namings
1334 _n = "cfgagent-{:02}".format(index)
1335 _pvc_n = "cfgagent-pvc-{:02}".format(index)
Alex90ac1532021-12-09 11:13:14 -06001336 # _pv_n = "cfgagent-pv-{:02}".format(index)
Alex5cace3b2021-11-10 16:40:37 -06001337
1338 _pod["metadata"]["name"] = _n
1339 _pod["metadata"]["labels"]["name"] = _n
1340 # replace volumeMounts
1341 for _c in _pod["spec"]["containers"]:
1342 for _mnt in _c["volumeMounts"]:
1343 if "placeholder" in _mnt["name"]:
Alex90ac1532021-12-09 11:13:14 -06001344 # _mnt["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001345 _mnt["mountPath"] = path
1346 # replace claim
1347 for _v in _pod["spec"]["volumes"]:
Alex30380a42021-12-20 16:11:20 -06001348 if "cfgagent-pv" in _v["name"]:
Alex90ac1532021-12-09 11:13:14 -06001349 # _v["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001350 _v["persistentVolumeClaim"]["claimName"] = _pvc_n
1351
1352 # init volume resources
Alex90ac1532021-12-09 11:13:14 -06001353 # _pv_object = self.kube.init_pv_resource(_pv_n, sc, size, path)
1354 # _pv = self.kube.prepare_pv(_pv_object)
Alex30380a42021-12-20 16:11:20 -06001355 # update size of the volume to be 15% larger
Alex5cace3b2021-11-10 16:40:37 -06001356 _pvc_object = self.kube.init_pvc_resource(_pvc_n, sc, size)
1357 _pvc = self.kube.prepare_pvc(_pvc_object)
1358
1359 # start pod
1360 _pod = self.kube.prepare_pod_from_yaml(_pod)
1361
Alex90ac1532021-12-09 11:13:14 -06001362 # return _pod, _pv, _pvc
1363 return _pod, _pvc
Alex5cace3b2021-11-10 16:40:37 -06001364
1365 def expose_benchmark_agent(self, agent):
1366 return self.kube.expose_pod_port(agent, 8765)
Alex2a7657c2021-11-10 20:51:34 -06001367
1368 def cleanup_resource_by_name(self, res_type, name, ns=None, wait=False):
1369 """Cleansup resource using string res_type and the ns/name
1370
1371 Args:
1372 res_type (string): resource type name: pod, pv, pvc, svc
1373 name (string): resource name to cleanup
1374 ns (string, optional): Namespace to use. Default is 'qa-space'
1375
1376 return: (Bool) Is Success?
1377 """
1378 # fill defaults
1379 if not ns:
1380 ns = self._namespace
1381 # Handle res_type errors and choose resource type
1382 if not res_type:
1383 logger_cli.debug(
1384 "... resource type invalid: '{}'".format(res_type)
1385 )
1386 return False
1387 elif not name:
1388 logger_cli.debug("... resource name invalid: '{}'".format(name))
1389 return False
1390 elif res_type == "svc":
1391 # Delete service
1392 logger_cli.info("-> deleting svc {}/{}".format(ns, name))
1393 self.kube.CoreV1.delete_namespaced_service(name, ns)
1394 # TODO: Check if successfull
1395 elif res_type == "pod":
1396 # Delete a pod
1397 logger_cli.info("-> deleting pod {}/{}".format(ns, name))
1398 self.kube.CoreV1.delete_namespaced_pod(name, ns)
1399 if wait:
1400 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1401 elif res_type == "pvc":
1402 logger_cli.info("-> deleting pvc {}/{}".format(ns, name))
1403 self.kube.CoreV1.delete_namespaced_persistent_volume_claim(
1404 name,
1405 ns
1406 )
1407 if wait:
1408 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1409 elif res_type == "pv":
1410 logger_cli.info("-> deleting pv {}/{}".format(ns, name))
1411 self.kube.CoreV1.delete_persistent_volume(name)
1412 if wait:
1413 self.kube.wait_for_phase(res_type, name, None, ["Terminated"])
1414
1415 return True
Alexbfa947c2021-11-11 18:14:28 -06001416
1417 def get_resource_phase_by_name(self, typ, name, ns="qa-space"):
1418 if typ == "pod":
1419 _t = self.kube.get_pod_by_name_and_ns(name, ns)
1420 elif typ == "svc":
1421 _t = self.kube.get_svc_by_name_and_ns(name, ns)
1422 elif typ == "pvc":
1423 _t = self.kube.get_pvc_by_name_and_ns(name, ns)
1424 elif typ == "pv":
1425 _t = self.kube.get_pv_by_name(name)
1426 else:
1427 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1428 return None
1429
1430 if _t:
1431 return _t.status.phase
1432 else:
1433 return None
Alexb2129542021-11-23 15:49:42 -06001434
1435 def list_resource_names_by_type_and_ns(self, typ, ns="qa-space"):
1436 if typ == "pod":
1437 _items = self.kube.list_pods(ns)
1438 elif typ == "svc":
1439 _items = self.kube.list_svc(ns)
1440 elif typ == "pvc":
1441 _items = self.kube.list_pvc(ns)
1442 elif typ == "pv":
1443 _items = self.kube.list_pv()
1444 else:
1445 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1446 return None
1447 return [[i.metadata.namespace, i.metadata.name] for i in _items.items]
Alex0989ecf2022-03-29 13:43:21 -05001448
1449 def get_logs_for_pod(self, podname, namespace):
1450 return self.kube.get_pod_logs(podname, namespace)