blob: fdda35877d2f59ab563c089653f7697b74ec1389 [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alexe0c5b9e2019-04-23 18:51:23 -05003import json
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06004import os
Alex1f90e7b2021-09-03 15:31:28 -05005import yaml
Alex3ebc5632019-04-18 16:47:18 -05006from copy import deepcopy
Alex9a4ad212020-10-01 18:04:25 -05007from multiprocessing.dummy import Pool
Alex1f90e7b2021-09-03 15:31:28 -05008from time import sleep
Alex Savatieiev9b2f6512019-02-20 18:05:00 -06009
Alex9a4ad212020-10-01 18:04:25 -050010from cfg_checker.clients import get_salt_remote, get_kube_remote
11from cfg_checker.common.const import all_salt_roles_map, all_kube_roles_map
Alexe9908f72020-05-19 16:04:53 -050012from cfg_checker.common.const import NODE_UP, NODE_DOWN, NODE_SKIP
Alex9a4ad212020-10-01 18:04:25 -050013from cfg_checker.common.const import ubuntu_versions, nova_openstack_versions
Alex7c9494e2019-04-22 10:40:59 -050014from cfg_checker.common import logger, logger_cli
Alexe0c5b9e2019-04-23 18:51:23 -050015from cfg_checker.common import utils
Alex9a4ad212020-10-01 18:04:25 -050016from cfg_checker.common.file_utils import create_temp_file_with_content
17from cfg_checker.common.exception import SaltException, KubeException
18from cfg_checker.common.ssh_utils import PortForward, SshShell
19from cfg_checker.common.settings import pkg_dir, ENV_TYPE_KUBE, ENV_TYPE_SALT
20from cfg_checker.helpers.console_utils import Progress
21
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060022
23node_tmpl = {
24 'role': '',
25 'node_group': '',
Alexe9908f72020-05-19 16:04:53 -050026 'status': NODE_DOWN,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060027 'pillars': {},
Alex9a4ad212020-10-01 18:04:25 -050028 'grains': {},
29 'raw': {}
Alex Savatieiev9b2f6512019-02-20 18:05:00 -060030}
31
32
Alex9a4ad212020-10-01 18:04:25 -050033def _prepare_skipped_nodes(_names, skip_list, skip_list_file):
34 _skipped_minions = []
35 # skip list file
36 if skip_list_file:
37 _valid, _invalid = utils.get_nodes_list(skip_list_file)
Alex9a4ad212020-10-01 18:04:25 -050038 _skipped_minions.extend(_valid)
Alex359e5752021-08-16 17:28:30 -050039 if len(_invalid) < 1:
40 logger_cli.info(
41 "\n# WARNING: Detected invalid entries "
42 "in nodes skip list:\n{}\n".format(
43 "\n".join(_invalid)
44 )
45 )
Alexe8643642021-08-23 14:08:46 -050046
Alex9a4ad212020-10-01 18:04:25 -050047 # process wildcard, create node list out of mask
48 if skip_list:
49 _list = []
50 _invalid = []
51 for _item in skip_list:
52 if '*' in _item:
53 _str = _item[:_item.index('*')]
54 _nodes = [_m for _m in _names if _m.startswith(_str)]
55 if not _nodes:
56 logger_cli.warn(
57 "# WARNING: No nodes found for {}".format(_item)
58 )
59 _list.extend(_nodes)
60 else:
61 if _item in _names:
62 _list += _item
63 else:
64 logger_cli.warn(
65 "# WARNING: No node found for {}".format(_item)
66 )
67 # removing duplicates
68 _list = list(set(_list))
69 _skipped_minions.extend(_list)
70
71 return _skipped_minions
72
73
74class Nodes(object):
75 def __init__(self, config):
76 self.nodes = None
77 self.env_config = config
78
79 def skip_node(self, node):
80 # Add node to skip list
81 # Fro example if it is fails to comply with the rules
82
83 # check if we know such node
84 if node in self.nodes.keys() and node not in self.skip_list:
85 # yes, add it
86 self.skip_list.append(node)
87 return True
88 else:
89 return False
90
91 def get_nodes(self, skip_list=None, skip_list_file=None):
92 if not self.nodes:
93 if not skip_list and self.env_config.skip_nodes:
94 self.gather_node_info(
95 self.env_config.skip_nodes,
96 skip_list_file
97 )
98 else:
99 self.gather_node_info(skip_list, skip_list_file)
100 return self.nodes
101
102 def get_info(self):
103 _info = {
104 'mcp_release': self.mcp_release,
105 'openstack_release': self.openstack_release
106 }
107 return _info
108
109 def is_node_available(self, node, log=True):
110 if node in self.skip_list:
111 if log:
112 logger_cli.info("-> node '{}' not active".format(node))
113 return False
114 elif node in self.not_responded:
115 if log:
116 logger_cli.info("-> node '{}' not responded".format(node))
117 return False
118 else:
119 return True
120
121
122class SaltNodes(Nodes):
123 def __init__(self, config):
124 super(SaltNodes, self).__init__(config)
Alexe0c5b9e2019-04-23 18:51:23 -0500125 logger_cli.info("# Gathering environment information")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600126 # simple salt rest client
Alex9a4ad212020-10-01 18:04:25 -0500127 self.salt = None
128 self.env_type = ENV_TYPE_SALT
Alex3ebc5632019-04-18 16:47:18 -0500129
Alexe9908f72020-05-19 16:04:53 -0500130 def gather_node_info(self, skip_list, skip_list_file):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600131 # Keys for all nodes
132 # this is not working in scope of 2016.8.3, will overide with list
Alexb151fbe2019-04-22 16:53:30 -0500133 logger_cli.debug("... collecting node names existing in the cloud")
Alexe0c5b9e2019-04-23 18:51:23 -0500134 if not self.salt:
Alex9a4ad212020-10-01 18:04:25 -0500135 self.salt = get_salt_remote(self.env_config)
Alexe0c5b9e2019-04-23 18:51:23 -0500136
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600137 try:
138 _keys = self.salt.list_keys()
139 _str = []
Alex3bc95f62020-03-05 17:00:04 -0600140 for _k, _v in _keys.items():
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600141 _str.append("{}: {}".format(_k, len(_v)))
142 logger_cli.info("-> keys collected: {}".format(", ".join(_str)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600143
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600144 self.node_keys = {
145 'minions': _keys['minions']
146 }
Alex3ebc5632019-04-18 16:47:18 -0500147 except Exception:
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600148 _keys = None
149 self.node_keys = None
Alex3ebc5632019-04-18 16:47:18 -0500150
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600151 # List of minions with grains
152 _minions = self.salt.list_minions()
153 if _minions:
Alex3ebc5632019-04-18 16:47:18 -0500154 logger_cli.info(
155 "-> api reported {} active minions".format(len(_minions))
156 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600157 elif not self.node_keys:
158 # this is the last resort
Alex9a4ad212020-10-01 18:04:25 -0500159 _minions = self.env_config.load_nodes_list()
Alex3ebc5632019-04-18 16:47:18 -0500160 logger_cli.info(
161 "-> {} nodes loaded from list file".format(len(_minions))
162 )
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600163 else:
164 _minions = self.node_keys['minions']
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600165
Alexe9908f72020-05-19 16:04:53 -0500166 # Skip nodes if needed
Alex9a4ad212020-10-01 18:04:25 -0500167 _skipped_minions = \
168 _prepare_skipped_nodes(_minions, skip_list, skip_list_file)
Alexe9908f72020-05-19 16:04:53 -0500169
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600170 # in case API not listed minions, we need all that answer ping
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600171 _active = self.salt.get_active_nodes()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600172 logger_cli.info("-> nodes responded: {}".format(len(_active)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600173 # iterate through all accepted nodes and create a dict for it
174 self.nodes = {}
Alex Savatieievefa79c42019-03-14 19:14:04 -0500175 self.skip_list = []
Alexe9908f72020-05-19 16:04:53 -0500176 _domains = set()
Alex Savatieiev9df93a92019-02-27 17:40:16 -0600177 for _name in _minions:
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600178 _nc = utils.get_node_code(_name)
Alex9a4ad212020-10-01 18:04:25 -0500179 _rmap = all_salt_roles_map
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600180 _role = _rmap[_nc] if _nc in _rmap else 'unknown'
Alexe9908f72020-05-19 16:04:53 -0500181 if _name in _skipped_minions:
182 _status = NODE_SKIP
Alex Savatieievefa79c42019-03-14 19:14:04 -0500183 self.skip_list.append(_name)
Alexe9908f72020-05-19 16:04:53 -0500184 else:
185 _status = NODE_UP if _name in _active else NODE_DOWN
186 if _status == NODE_DOWN:
187 self.skip_list.append(_name)
188 logger_cli.info(
189 "-> '{}' is down, "
190 "added to skip list".format(
191 _name
192 )
193 )
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600194 self.nodes[_name] = deepcopy(node_tmpl)
Alexe9908f72020-05-19 16:04:53 -0500195 self.nodes[_name]['shortname'] = _name.split(".", 1)[0]
196 _domains.add(_name.split(".", 1)[1])
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600197 self.nodes[_name]['node_group'] = _nc
198 self.nodes[_name]['role'] = _role
199 self.nodes[_name]['status'] = _status
Alexe9908f72020-05-19 16:04:53 -0500200 _domains = list(_domains)
201 if len(_domains) > 1:
202 logger_cli.warning(
203 "Multiple domains detected: {}".format(",".join(_domains))
204 )
Alex205546c2020-12-30 19:22:30 -0600205 # TODO: Use domain with biggest node count by default
206 # or force it via config option
Alexe9908f72020-05-19 16:04:53 -0500207 else:
208 self.domain = _domains[0]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500209 logger_cli.info("-> {} nodes inactive".format(len(self.skip_list)))
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600210 logger_cli.info("-> {} nodes collected".format(len(self.nodes)))
211
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600212 # form an all nodes compound string to use in salt
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600213 self.active_nodes_compound = self.salt.compound_string_from_list(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600214 filter(
Alexe9908f72020-05-19 16:04:53 -0500215 lambda nd: self.nodes[nd]['status'] == NODE_UP,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600216 self.nodes
217 )
218 )
Alex41485522019-04-12 17:26:18 -0500219 # get master node fqdn
Alex3bc95f62020-03-05 17:00:04 -0600220 # _filtered = filter(
221 # lambda nd: self.nodes[nd]['role'] == const.all_roles_map['cfg'],
222 # self.nodes
223 # )
Alex9a4ad212020-10-01 18:04:25 -0500224 _role = all_salt_roles_map['cfg']
Alex3bc95f62020-03-05 17:00:04 -0600225 _filtered = [n for n, v in self.nodes.items() if v['role'] == _role]
Alexe0c5b9e2019-04-23 18:51:23 -0500226 if len(_filtered) < 1:
227 raise SaltException(
228 "No master node detected! Check/Update node role map."
229 )
230 else:
231 self.salt.master_node = _filtered[0]
Alex3ebc5632019-04-18 16:47:18 -0500232
Alex41485522019-04-12 17:26:18 -0500233 # OpenStack versions
234 self.mcp_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500235 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500236 "_param:apt_mk_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500237 )[self.salt.master_node]
Alex41485522019-04-12 17:26:18 -0500238 self.openstack_release = self.salt.pillar_get(
Alexe0c5b9e2019-04-23 18:51:23 -0500239 self.salt.master_node,
Alex41485522019-04-12 17:26:18 -0500240 "_param:openstack_version"
Alexe0c5b9e2019-04-23 18:51:23 -0500241 )[self.salt.master_node]
Alexd0391d42019-05-21 18:48:55 -0500242 # Preload codenames
243 # do additional queries to get linux codename and arch for each node
244 self.get_specific_pillar_for_nodes("_param:linux_system_codename")
245 self.get_specific_pillar_for_nodes("_param:linux_system_architecture")
246 for _name in self.nodes.keys():
Alexe9547d82019-06-03 15:22:50 -0500247 _n = self.nodes[_name]
248 if _name not in self.skip_list:
249 _p = _n['pillars']['_param']
250 _n['linux_codename'] = _p['linux_system_codename']
251 _n['linux_arch'] = _p['linux_system_architecture']
Alex41485522019-04-12 17:26:18 -0500252
Alex1839bbf2019-08-22 17:17:21 -0500253 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500254 """Function runs. cmd.run and parses result into place
255 or into dict structure provided
256
257 :return: no return value, data pulished internally
258 """
259 logger_cli.debug(
260 "... collecting results for '{}'".format(cmd)
261 )
262 if target_dict:
263 _nodes = target_dict
264 else:
265 _nodes = self.nodes
Alex1839bbf2019-08-22 17:17:21 -0500266 _result = self.execute_cmd_on_active_nodes(cmd, nodes=nodes)
Alex3bc95f62020-03-05 17:00:04 -0600267 for node, data in _nodes.items():
Alexf3dbe862019-10-07 15:17:04 -0500268
Alex836fac82019-08-22 13:36:16 -0500269 if node in self.skip_list:
270 logger_cli.debug(
271 "... '{}' skipped while collecting '{}'".format(
272 node,
273 cmd
274 )
275 )
276 continue
277 # Prepare target key
278 if target_key not in data:
279 data[target_key] = None
280 # Save data
Alexe9908f72020-05-19 16:04:53 -0500281 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex836fac82019-08-22 13:36:16 -0500282 data[target_key] = None
Alex1839bbf2019-08-22 17:17:21 -0500283 elif node not in _result:
284 continue
Alex836fac82019-08-22 13:36:16 -0500285 elif not _result[node]:
286 logger_cli.debug(
287 "... '{}' not responded after '{}'".format(
288 node,
Alex9a4ad212020-10-01 18:04:25 -0500289 self.env_config.salt_timeout
Alex836fac82019-08-22 13:36:16 -0500290 )
291 )
292 data[target_key] = None
293 else:
294 data[target_key] = _result[node]
295
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600296 def get_specific_pillar_for_nodes(self, pillar_path):
297 """Function gets pillars on given path for all nodes
298
299 :return: no return value, data pulished internally
300 """
Alex3ebc5632019-04-18 16:47:18 -0500301 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500302 "... collecting node pillars for '{}'".format(pillar_path)
Alex3ebc5632019-04-18 16:47:18 -0500303 )
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600304 _result = self.salt.pillar_get(self.active_nodes_compound, pillar_path)
Alex Savatieievefa79c42019-03-14 19:14:04 -0500305 self.not_responded = []
Alex3bc95f62020-03-05 17:00:04 -0600306 for node, data in self.nodes.items():
Alex Savatieievefa79c42019-03-14 19:14:04 -0500307 if node in self.skip_list:
308 logger_cli.debug(
309 "... '{}' skipped while collecting '{}'".format(
310 node,
311 pillar_path
312 )
313 )
314 continue
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600315 _pillar_keys = pillar_path.split(':')
316 _data = data['pillars']
317 # pre-create nested dict
318 for idx in range(0, len(_pillar_keys)-1):
319 _key = _pillar_keys[idx]
320 if _key not in _data:
321 _data[_key] = {}
322 _data = _data[_key]
Alexe9908f72020-05-19 16:04:53 -0500323 if data['status'] in [NODE_DOWN, NODE_SKIP]:
Alex Savatieievefa79c42019-03-14 19:14:04 -0500324 _data[_pillar_keys[-1]] = None
325 elif not _result[node]:
326 logger_cli.debug(
327 "... '{}' not responded after '{}'".format(
328 node,
Alex9a4ad212020-10-01 18:04:25 -0500329 self.env_config.salt_timeout
Alex Savatieievefa79c42019-03-14 19:14:04 -0500330 )
331 )
332 _data[_pillar_keys[-1]] = None
333 self.not_responded.append(node)
334 else:
335 _data[_pillar_keys[-1]] = _result[node]
Alex3ebc5632019-04-18 16:47:18 -0500336
Alexe0c5b9e2019-04-23 18:51:23 -0500337 def prepare_json_on_node(self, node, _dict, filename):
Alex359e5752021-08-16 17:28:30 -0500338 if node in self.skip_list:
339 logger_cli.debug(
340 "... '{}' skipped while preparing json file of '{}'".format(
341 node,
342 filename
343 )
344 )
345
Alexe0c5b9e2019-04-23 18:51:23 -0500346 # this function assumes that all folders are created
347 _dumps = json.dumps(_dict, indent=2).splitlines()
348 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500349 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alexe0c5b9e2019-04-23 18:51:23 -0500350 )
351 logger_cli.debug(
352 "... uploading data as '{}' "
353 "to master's file cache folder: '{}'".format(
354 filename,
355 _storage_path
356 )
357 )
358 _cache_path = os.path.join(_storage_path, filename)
359 _source_path = os.path.join(
360 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500361 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500362 filename
363 )
364 _target_path = os.path.join(
365 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500366 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500367 filename
368 )
369
370 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
371 self.salt.f_touch_master(_cache_path)
372 self.salt.f_append_master(_cache_path, _dumps)
373 logger.debug("... syncing file to '{}'".format(node))
374 self.salt.get_file(
375 node,
376 _source_path,
377 _target_path,
378 tgt_type="compound"
379 )
380 return _target_path
381
382 def prepare_script_on_active_nodes(self, script_filename):
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600383 # Prepare script
384 _p = os.path.join(pkg_dir, 'scripts', script_filename)
385 with open(_p, 'rt') as fd:
386 _script = fd.read().splitlines()
387 _storage_path = os.path.join(
Alex9a4ad212020-10-01 18:04:25 -0500388 self.env_config.salt_file_root, self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600389 )
390 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500391 "... uploading script {} "
Alex3ebc5632019-04-18 16:47:18 -0500392 "to master's file cache folder: '{}'".format(
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600393 script_filename,
394 _storage_path
395 )
396 )
Alexe0c5b9e2019-04-23 18:51:23 -0500397 self.salt.mkdir(self.salt.master_node, _storage_path)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600398 # Form cache, source and target path
399 _cache_path = os.path.join(_storage_path, script_filename)
400 _source_path = os.path.join(
401 'salt://',
Alex9a4ad212020-10-01 18:04:25 -0500402 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600403 script_filename
404 )
405 _target_path = os.path.join(
406 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500407 self.env_config.salt_scripts_folder,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600408 script_filename
409 )
410
Alexb151fbe2019-04-22 16:53:30 -0500411 logger_cli.debug("... creating file in cache '{}'".format(_cache_path))
Alex3ebc5632019-04-18 16:47:18 -0500412 self.salt.f_touch_master(_cache_path)
413 self.salt.f_append_master(_cache_path, _script)
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600414 # command salt to copy file to minions
Alex3ebc5632019-04-18 16:47:18 -0500415 logger_cli.debug(
Alexb151fbe2019-04-22 16:53:30 -0500416 "... creating script target folder '{}'".format(
Alex3ebc5632019-04-18 16:47:18 -0500417 _cache_path
418 )
419 )
420 self.salt.mkdir(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600421 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600422 os.path.join(
423 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500424 self.env_config.salt_scripts_folder
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600425 ),
426 tgt_type="compound"
427 )
Alex3ebc5632019-04-18 16:47:18 -0500428 logger.debug("... syncing file to nodes")
429 self.salt.get_file(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600430 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600431 _source_path,
432 _target_path,
433 tgt_type="compound"
434 )
Alexe0c5b9e2019-04-23 18:51:23 -0500435 # return path on nodes, just in case
436 return _target_path
437
438 def execute_script_on_node(self, node, script_filename, args=[]):
439 # Prepare path
440 _target_path = os.path.join(
441 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500442 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500443 script_filename
444 )
445
446 # execute script
447 logger.debug("... running script on '{}'".format(node))
448 # handle results for each node
449 _script_arguments = " ".join(args) if args else ""
450 self.not_responded = []
451 _r = self.salt.cmd(
452 node,
453 'cmd.run',
454 param='python {} {}'.format(_target_path, _script_arguments),
455 expr_form="compound"
456 )
457
458 # all false returns means that there is no response
459 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
460 return _r
461
Alex1f90e7b2021-09-03 15:31:28 -0500462 def execute_script_on_active_nodes(self, script_filename, args=None):
Alexe0c5b9e2019-04-23 18:51:23 -0500463 # Prepare path
464 _target_path = os.path.join(
465 '/root',
Alex9a4ad212020-10-01 18:04:25 -0500466 self.env_config.salt_scripts_folder,
Alexe0c5b9e2019-04-23 18:51:23 -0500467 script_filename
468 )
469
470 # execute script
Alexd0391d42019-05-21 18:48:55 -0500471 logger_cli.debug("... running script")
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600472 # handle results for each node
Alex1f90e7b2021-09-03 15:31:28 -0500473 _script_arguments = args if args else ""
Alex Savatieievefa79c42019-03-14 19:14:04 -0500474 self.not_responded = []
475 _r = self.salt.cmd(
Alex Savatieiev01f0d7f2019-03-07 17:53:29 -0600476 self.active_nodes_compound,
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600477 'cmd.run',
478 param='python {} {}'.format(_target_path, _script_arguments),
479 expr_form="compound"
480 )
481
Alex Savatieievefa79c42019-03-14 19:14:04 -0500482 # all false returns means that there is no response
Alex3ebc5632019-04-18 16:47:18 -0500483 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
Alex Savatieievefa79c42019-03-14 19:14:04 -0500484 return _r
Alex Savatieiev9b2f6512019-02-20 18:05:00 -0600485
Alex1839bbf2019-08-22 17:17:21 -0500486 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
Alex836fac82019-08-22 13:36:16 -0500487 # execute cmd
488 self.not_responded = []
489 _r = self.salt.cmd(
Alex1839bbf2019-08-22 17:17:21 -0500490 nodes if nodes else self.active_nodes_compound,
Alex836fac82019-08-22 13:36:16 -0500491 'cmd.run',
492 param=cmd,
493 expr_form="compound"
494 )
495
496 # all false returns means that there is no response
497 self.not_responded = [_n for _n in _r.keys() if not _r[_n]]
498 return _r
499
Alex9a4ad212020-10-01 18:04:25 -0500500
501class KubeNodes(Nodes):
502 def __init__(self, config):
503 super(KubeNodes, self).__init__(config)
504 logger_cli.info("# Gathering environment information")
505 # simple salt rest client
506 self.kube = get_kube_remote(self.env_config)
507 self.env_type = ENV_TYPE_KUBE
Alex1f90e7b2021-09-03 15:31:28 -0500508 self._namespace = "qa-space"
509 self._configmap_name = self.env_config.kube_scripts_folder
510
511 # prepare needed resources
512 self._check_namespace()
513 self._scripts = self._check_config_map()
Alexb78191f2021-11-02 16:35:46 -0500514 self.prepared_daemonsets = []
Alex1f90e7b2021-09-03 15:31:28 -0500515
516 def _check_namespace(self):
517 # ensure namespace
518 logger_cli.debug(
519 "... checking namespace '{}'".format(self._namespace)
520 )
521 if not self.kube.ensure_namespace(self._namespace):
522 raise KubeException(
523 "Failed to manage namespace '{}'".format(self._namespace)
524 )
525
526 def _check_config_map(self):
527 # ensure config map exists
528 logger_cli.debug(
529 "... checking config map '{}'".format(self._configmap_name)
530 )
531 _source = os.path.join(pkg_dir, 'scripts')
532 return self.kube.create_config_map(
533 self._namespace,
534 self._configmap_name,
535 _source
536 )
Alex9a4ad212020-10-01 18:04:25 -0500537
538 def gather_node_info(self, skip_list, skip_list_file):
539 # Gather nodes info and query pod lists for each node
540 logger_cli.debug("... collecting node names existing in the cloud")
541
542 # Gather node names and info
543 _nodes = self.kube.get_node_info()
544 _node_names = list(_nodes.keys())
545 # Skip nodes if needed
546 _skipped_nodes = \
547 _prepare_skipped_nodes(_node_names, skip_list, skip_list_file)
548
549 # Count how many nodes active
550 self._active = [n for n, v in _nodes.items()
551 if v['conditions']['ready']['status']]
552
553 # iterate through all accepted nodes and create a dict for it
554 self.nodes = {}
555 self.skip_list = []
Alex9a4ad212020-10-01 18:04:25 -0500556 for _name in _node_names:
557 if _name in _skipped_nodes:
558 _status = NODE_SKIP
559 self.skip_list.append(_name)
560 else:
561 _status = NODE_UP if _name in self._active else NODE_DOWN
562 if _status == NODE_DOWN:
563 self.skip_list.append(_name)
564 logger_cli.info(
565 "-> '{}' shows 'Ready' as 'False', "
566 "added to skip list".format(
567 _name
568 )
569 )
570 _roles = {}
571 _labels = {}
572 for _label, _value in _nodes[_name]['labels'].items():
573 if _label in all_kube_roles_map:
574 _roles[all_kube_roles_map[_label]] = _value
575 else:
576 _labels[_label] = _value
577
578 self.nodes[_name] = deepcopy(node_tmpl)
579 self.nodes[_name].pop("grains")
580 self.nodes[_name].pop("pillars")
581
582 # hostname
583 self.nodes[_name]['shortname'] = \
584 _nodes[_name]['addresses']['hostname']['address']
585 self.nodes[_name]['internalip'] = \
586 _nodes[_name]['addresses']['internalip']['address']
Alex9a4ad212020-10-01 18:04:25 -0500587 self.nodes[_name]['node_group'] = None
588 self.nodes[_name]['labels'] = _labels
589 self.nodes[_name]['roles'] = _roles
590 self.nodes[_name]['status'] = _status
591 # Backward compatibility
592 _info = _nodes[_name]['status']['node_info']
593 self.nodes[_name]['linux_image'] = _info['os_image']
594 self.nodes[_name]['linux_arch'] = _info['architecture']
595
596 _codename = "unknown"
597 _n, _v, _c = _info['os_image'].split()
598 if _n.lower() == 'ubuntu':
599 _v, _, _ = _v.rpartition('.') if '.' in _v else (_v, "", "")
600 if _v in ubuntu_versions:
601 _codename = ubuntu_versions[_v].split()[0].lower()
602 self.nodes[_name]['linux_codename'] = _codename
603
604 # Consider per-data type transfer
605 self.nodes[_name]["raw"] = _nodes[_name]
606 # TODO: Investigate how to handle domains in Kube, probably - skip
607 # _domains = list(_domains)
608 # if len(_domains) > 1:
609 # logger_cli.warning(
610 # "Multiple domains detected: {}".format(",".join(_domains))
611 # )
612 # else:
Alex1f90e7b2021-09-03 15:31:28 -0500613 self.domain = "no.domain.in.kube.yet"
Alex9a4ad212020-10-01 18:04:25 -0500614 logger_cli.info(
615 "-> {} nodes collected: {} - active, {} - not active".format(
616 len(self.nodes),
617 len(self._active),
618 len(self.skip_list)
619 )
620 )
621
622 _role = "k8s-master"
623 _filtered = [n for n, v in self.nodes.items() if _role in v['roles']]
624 if len(_filtered) < 1:
625 raise KubeException(
626 "No k8s-master nodes detected! Check/Update node role map."
627 )
Alex Savatieievefa79c42019-03-14 19:14:04 -0500628 else:
Alex9a4ad212020-10-01 18:04:25 -0500629 _r = [n for n, v in self.nodes.items()
630 if v['status'] != NODE_UP and _role in v['roles']]
631 if len(_r) > 0:
632 logger_cli.warn(
633 "Master nodes are reporting 'NotReady:\n{}".format(
634 "\n".join(_r)
635 )
636 )
637 self.kube.master_node = _filtered[0]
Alexe0c5b9e2019-04-23 18:51:23 -0500638
Alex9a4ad212020-10-01 18:04:25 -0500639 # get specific data upfront
640 # OpenStack versions
641 self.mcp_release = ""
642 # Quick and Dirty way to detect OS release
Alexccb72e02021-01-20 16:38:03 -0600643 try:
644 _nova_version = self.kube.exec_on_target_pod(
645 "nova-manage --version",
646 "nova-api-osapi",
647 "openstack"
648 )
649 _nmajor = _nova_version.partition('.')[0]
650 self.openstack_release = nova_openstack_versions[_nmajor]
651 except KubeException as e:
652 logger_cli.warn("Openstack not detected: {}".format(e.message))
653 self.openstack_release = nova_openstack_versions["00"]
Alexe0c5b9e2019-04-23 18:51:23 -0500654
Alex9a4ad212020-10-01 18:04:25 -0500655 return
656
657 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500658 def _get_ssh_shell(_h, _u, _k, _p, _q, _pipe, timeout=15):
Alex9a4ad212020-10-01 18:04:25 -0500659 _ssh = SshShell(
660 _h,
661 user=_u,
662 keypath=_k,
663 port=_p,
664 silent=_q,
Alex1f90e7b2021-09-03 15:31:28 -0500665 piped=_pipe,
666 timeout=timeout
Alex9a4ad212020-10-01 18:04:25 -0500667 )
668 return _ssh.connect()
669
670 @staticmethod
Alex1f90e7b2021-09-03 15:31:28 -0500671 def _do_ssh_cmd(_cmd, _h, _u, _k, _p, _q, _pipe, timeout=None):
Alex9a4ad212020-10-01 18:04:25 -0500672 with SshShell(
673 _h,
674 user=_u,
675 keypath=_k,
676 port=_p,
677 silent=_q,
678 piped=_pipe
679 ) as ssh:
Alex1f90e7b2021-09-03 15:31:28 -0500680 if timeout is None:
681 _r = ssh.do(_cmd)
682 else:
683 _r = ssh.do(_cmd, timeout=timeout)
Alex9a4ad212020-10-01 18:04:25 -0500684 logger_cli.debug("'{}'".format(_r))
685 return _r
686
687 def node_shell(
688 self,
689 node,
690 silent=True,
691 piped=True,
692 use_sudo=True,
693 fport=None
694 ):
695 _u = self.env_config.kube_node_user
696 _k = self.env_config.kube_node_keypath
697 _h = self.nodes[node]['internalip']
698 _p = 22
Alexeffa0682021-06-04 12:18:33 -0500699 if self.kube.is_local or self.kube.config.ssh_direct:
Alexf6ec91b2021-09-10 10:11:17 -0500700 logger.debug("Getting shell with no port forward")
701 return [None, self._get_ssh_shell(
Alex1f90e7b2021-09-03 15:31:28 -0500702 _h, _u, _k, _p, silent, piped,
703 timeout=self.kube.config.ssh_connect_timeout
Alexf6ec91b2021-09-10 10:11:17 -0500704 )]
Alex9a4ad212020-10-01 18:04:25 -0500705 else:
Alexf6ec91b2021-09-10 10:11:17 -0500706 logger.debug("Getting shell with with forward")
Alex9a4ad212020-10-01 18:04:25 -0500707 _fh = "localhost"
708 _p = 10022 if not fport else fport
709 _pfwd = PortForward(
710 self.env_config.ssh_host,
711 _h,
712 user=_u,
713 keypath=self.env_config.ssh_key,
Alex1f90e7b2021-09-03 15:31:28 -0500714 loc_port=_p,
715 timeout=self.kube.config.ssh_connect_timeout
Alex9a4ad212020-10-01 18:04:25 -0500716 )
717 _pfwd.connect()
Alex1f90e7b2021-09-03 15:31:28 -0500718 _ssh = self._get_ssh_shell(
719 _fh,
720 _u,
721 _k,
722 _p,
723 silent,
724 piped,
725 timeout=self.kube.config.ssh_connect_timeout
726 )
Alexf6ec91b2021-09-10 10:11:17 -0500727 return [_pfwd, _ssh]
Alex9a4ad212020-10-01 18:04:25 -0500728
729 def execute_script_on_node(self, node, script_filename, args=[]):
730 # Prepare path
731 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600732 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500733 self.env_config.kube_scripts_folder,
734 script_filename
735 )
736
737 # execute script
738 logger_cli.debug("... running script on '{}'".format(node))
739 # handle results for each node
740 _script_arguments = " ".join(args) if args else ""
741 self.not_responded = []
742 # get result
743 _nr = self.node_shell(
744 node,
745 "python {} {}".format(
746 _target_path,
747 _script_arguments
748 )
749 )
750
751 if not _nr:
752 self.not_responded.append(node)
753 return {}
754 else:
755 return {node: _nr}
756
757 def execute_cmd_on_active_nodes(self, cmd, nodes=None):
758 # execute script
759 logger_cli.debug("...running '{}' on active nodes".format(cmd))
760 # handle results for each node
761 self.not_responded = []
762 _r = {}
763 # TODO: Use threading and pool
764 for node in self._active:
765 _nr = self.node_shell(
766 node,
767 cmd
768 )
769
770 if not _nr:
771 self.not_responded.append(node)
772 else:
773 _r[node] = _nr
774
775 return _r
776
Alex1f90e7b2021-09-03 15:31:28 -0500777 def _ssh_exec_script(self, params):
Alex9a4ad212020-10-01 18:04:25 -0500778 """
779 Threadsafe method to get shell to node,
780 check/copy script and get results
781 [
782 node_name,
783 src_path,
784 tgt_path,
785 conf,
786 args
787 ]
788 """
Alex1f90e7b2021-09-03 15:31:28 -0500789 _timeout = self.kube.config.script_execution_timeout
Alex9a4ad212020-10-01 18:04:25 -0500790 _name = params[0]
791 _src = params[1]
792 _tgt = params[2]
793 _conf = params[3]
794 _args = params[4]
795 _port = params[5]
796 _log_name = "["+_name+"]:"
797 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
798 "else echo False; fi)"
799 _fwd_sh, _sh = self.node_shell(
800 _name,
801 use_sudo=False,
802 fport=_port
803 )
804 # check python3
805 _python = _sh.do("which python3")
806 _python = utils.to_bool(
807 _sh.do(_check.format(_python))
808 )
809 if not _python:
Alex1f90e7b2021-09-03 15:31:28 -0500810 _sh.do("apt install python3", sudo=True, timeout=_timeout)
Alex9a4ad212020-10-01 18:04:25 -0500811 # check if script already there
812 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600813 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500814 _conf.kube_scripts_folder
815 )
816 # check if folder exists
817 _folder_exists = utils.to_bool(
818 _sh.do(_check.format(_folder))
819 )
820 if not _folder_exists:
821 _sh.do("mkdir " + _folder)
822 logger.info("{} Syncing file".format(_log_name))
823 _code, _r, _e = _sh.scp(
824 _src,
825 _sh.get_host_path(_tgt),
826 )
827 # handle error code
828 if _code:
829 logger_cli.warn(
830 "{} Error in scp:\n"
831 "\tstdout:'{}'\n"
832 "\tstderr:'{}'".format(_log_name, _r, _e)
833 )
834
835 # execute script
836 logger.debug("{} Running script".format(_log_name))
837 _out = _sh.do(
838 "python3 {}{}".format(
839 _tgt,
840 _args
841 ),
Alex1f90e7b2021-09-03 15:31:28 -0500842 sudo=True,
843 timeout=_timeout
Alex9a4ad212020-10-01 18:04:25 -0500844 )
845
846 if _fwd_sh:
847 _fwd_sh.kill()
848 _sh.kill()
849
850 return [_name, _out]
851
Alex1f90e7b2021-09-03 15:31:28 -0500852 def execute_script_on_active_nodes(self, script_filename, args=None):
Alex9a4ad212020-10-01 18:04:25 -0500853 # Prepare script
854 _source_path = os.path.join(pkg_dir, 'scripts', script_filename)
855 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600856 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500857 self.env_config.kube_scripts_folder,
858 script_filename
859 )
860 # handle results for each node
861 _script_arguments = " ".join(args) if args else ""
862 if _script_arguments:
863 _script_arguments = " " + _script_arguments
864 self.not_responded = []
865 _results = {}
866 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500867 "... running '{}' on active nodes, {} worker threads".format(
Alex9a4ad212020-10-01 18:04:25 -0500868 script_filename,
869 self.env_config.threads
870 )
871 )
872 # Workers pool
873 pool = Pool(self.env_config.threads)
874
875 # init the parameters
876 # node_name,
877 # src_path,
878 # tgt_path,
879 # conf,
880 # args
881 _params = []
882 _port = 10022
883 for node in self._active:
884 # build parameter blocks
885 _p_list = [
886 node,
887 _source_path,
888 _target_path,
889 self.env_config,
890 _script_arguments,
891 _port
892 ]
893 _params.append(_p_list)
894 _port += 1
895
896 _progress = Progress(len(_params))
Alex1f90e7b2021-09-03 15:31:28 -0500897 results = pool.imap_unordered(self._ssh_exec_script, _params)
Alex9a4ad212020-10-01 18:04:25 -0500898
899 for ii in enumerate(results, start=1):
900 if not ii[1][1]:
901 self.not_responded.append(ii[1][0])
902 else:
903 _results[ii[1][0]] = ii[1][1]
904 _progress.write_progress(ii[0])
905
906 _progress.end()
907 pool.close()
908 pool.join()
909
910 # return path on nodes, just in case
911 return _results
912
913 def prepare_json_on_node(self, node, _dict, filename):
914 # this function assumes that all folders are created
915 _dumps = json.dumps(_dict, indent=2).splitlines()
916
917 _source_path = create_temp_file_with_content(_dumps)
918 _target_path = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600919 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500920 self.env_config.kube_scripts_folder,
921 filename
922 )
923 _folder = os.path.join(
Alexccb72e02021-01-20 16:38:03 -0600924 self.env_config.kube_node_homepath,
Alex9a4ad212020-10-01 18:04:25 -0500925 self.env_config.kube_scripts_folder
926 )
927 _check = "echo $(if [[ -s '{}' ]]; then echo True; " \
928 "else echo False; fi)"
929 _fwd_sh, _sh = self.node_shell(
930 node,
931 use_sudo=False
932 )
933
934 # check if folder exists
935 _folder_exists = utils.to_bool(
936 _sh.do(_check.format(_folder))
937 )
938 if not _folder_exists:
939 _sh.do("mkdir " + _folder)
940 logger_cli.debug(
Alexc4f59622021-08-27 13:42:00 -0500941 "... create data on node '{}':'{}'".format(node, _target_path)
Alex9a4ad212020-10-01 18:04:25 -0500942 )
943 _code, _r, _e = _sh.scp(
944 _source_path,
945 _sh.get_host_path(_target_path),
946 )
947 # handle error code
948 if _code:
949 logger_cli.warn(
950 "Error in scp:\n"
951 "\tstdout:'{}'\n"
952 "\tstderr:'{}'".format(_r, _e)
953 )
954
955 _fwd_sh.kill()
956 _sh.kill()
957 return _target_path
Alex1f90e7b2021-09-03 15:31:28 -0500958
Alex7b0ee9a2021-09-21 17:16:17 -0500959 def prepare_daemonset(self, template_filename):
Alex1f90e7b2021-09-03 15:31:28 -0500960 # load template
961 _yaml_file = os.path.join(pkg_dir, 'templates', template_filename)
962 logger_cli.debug("... loading template '{}'".format(_yaml_file))
963 _ds = {}
964 with open(_yaml_file) as dsFile:
965 _ds = yaml.load(dsFile, Loader=yaml.SafeLoader)
966
967 # Add scripts to pod template as volumeMounts
968 _tspec = _ds['spec']['template']['spec']
969 _tspec['containers'][0]['volumeMounts'] = [
970 {
971 "name": "scripts",
972 "mountPath": os.path.join(
973 "/",
974 self.env_config.kube_scripts_folder
975 )
976 }
977 ]
978
979 _tspec['volumes'] = [
980 {
981 "name": "scripts",
982 "configMap": {
983 "name": self._configmap_name
984 }
985 }
986 ]
987
988 # create daemonset
989 logger_cli.debug("... preparing daemonset")
Alexb78191f2021-11-02 16:35:46 -0500990 _ds = self.kube.prepare_daemonset_from_yaml(self._namespace, _ds)
991 # Save prepared daemonset
992 self.prepared_daemonsets.append(_ds)
993 # return it
994 return _ds
Alex1f90e7b2021-09-03 15:31:28 -0500995
996 def wait_for_daemonset(self, ds, timeout=120):
997 # iteration timeout
998 _sleep_time = 5
999 _timeout = timeout
1000
1001 # query daemonset and check that desired=scheduled=ready
1002 _ds = self.kube.get_daemon_set_by_name(
1003 ds.metadata.namespace,
1004 ds.metadata.name
1005 )
1006
1007 _total = len(self.nodes)
1008 # _scheduled = _ds.status.scheduled
1009 # _ready = _ds.status.ready
1010
1011 # Init Progress bar to show daemonset readiness
1012 _progress = Progress(_total)
1013 while _timeout > 0:
1014 # get new status
1015 _ds = self.kube.get_daemon_set_by_name(
1016 ds.metadata.namespace,
1017 ds.metadata.name
1018 )
1019 _desired = _ds.status.desired_number_scheduled
1020 _scheduled = _ds.status.current_number_scheduled
1021 _ready = _ds.status.number_ready
1022 _updated = _ds.status.updated_number_scheduled
1023 # print it
1024 _progress.write_progress(
1025 _ready,
1026 note="desired: {}, scheduled: {}, ready: {},"
1027 " up-to-date: {}".format(
1028 _desired,
1029 _scheduled,
1030 _ready,
1031 _updated
1032 )
1033 )
1034
1035 # check values and return
1036 # In case of Update, also checking _updated value
1037 if _ready == _updated and _ready == _total:
1038 # close progress bar class
1039 _progress.end()
1040 logger_cli.debug("... daemonset is ready")
1041 return True
1042 # iterate
1043 _timeout -= _sleep_time
1044 # wait
1045 sleep(_sleep_time)
1046
1047 # timed out
1048 _progress.end()
1049 # log it
1050 logger_cli.error("Timed out waiting for Daemonset to be ready")
1051 return False
1052
Alexdcb792f2021-10-04 14:24:21 -05001053 def exec_script_on_target_pod(self, pod_name, script_filename, args=None):
Alex7b0ee9a2021-09-21 17:16:17 -05001054 """
1055 Run script from configmap on target pod assuming it is present
1056 """
1057 _arguments = args if args else ""
1058 _cmd = [
1059 "python3",
1060 os.path.join(
1061 "/",
1062 self.env_config.kube_scripts_folder,
1063 script_filename
1064 )
1065 ] + _arguments
1066 _result = self.kube.exec_on_target_pod(
1067 _cmd,
1068 pod_name,
1069 self._namespace,
1070 strict=True
1071 )
1072 return _result
1073
Alexdcb792f2021-10-04 14:24:21 -05001074 def exec_cmd_on_target_pod(self, pod_name, ns, command_str):
1075 """
1076 Run script from configmap on target pod assuming it is present
1077 """
1078 _result = self.kube.exec_on_target_pod(
1079 command_str,
1080 pod_name,
1081 ns,
1082 strict=True
1083 )
1084 return _result
1085
Alexb78191f2021-11-02 16:35:46 -05001086 def execute_cmd_on_daemon_set(
1087 self,
1088 ds,
1089 cmd,
Alexb2129542021-11-23 15:49:42 -06001090 _args=None,
Alexb78191f2021-11-02 16:35:46 -05001091 is_script=False
1092 ):
Alex1f90e7b2021-09-03 15:31:28 -05001093 """
1094 Query daemonset for pods and execute script on all of them
1095 """
Alexb2129542021-11-23 15:49:42 -06001096 _results = self.exec_cmd_on_pods(
1097 self.kube.get_pods_for_daemonset(ds),
1098 cmd,
1099 _args=_args,
1100 is_script=is_script
1101 )
1102 # Update results
1103 _ds_results = {}
1104 for _n, _, _v in _results:
1105 _ds_results[_n] = _v
1106 return _ds_results
1107
1108 def exec_on_labeled_pods_and_ns(self, label_str, cmd, _args=None, ns=None):
1109 if not ns:
1110 ns = self._namespace
1111 _results = self.exec_cmd_on_pods(
1112 self.kube.list_pods(ns, label_str=label_str),
1113 cmd,
1114 _args=_args
1115 )
1116 _pod_results = {}
1117 for _, _p, _v in _results:
1118 _pod_results[_p] = _v
1119 return _pod_results
1120
1121 def exec_cmd_on_pods(
1122 self,
1123 pod_list,
1124 cmd,
1125 _args=None,
1126 is_script=False
1127 ):
Alex1f90e7b2021-09-03 15:31:28 -05001128 def _kube_exec_on_pod(plist):
1129 return [
1130 plist[1], # node
1131 plist[3], # pod name
1132 plist[0].kube.exec_on_target_pod( # pointer to function
1133 plist[4], # cmd
1134 plist[3], # pod name
1135 plist[2], # namespace
1136 strict=True,
1137 _request_timeout=120,
Alexb78191f2021-11-02 16:35:46 -05001138 arguments=plist[5]
Alex1f90e7b2021-09-03 15:31:28 -05001139 )
1140 ]
1141
Alex1f90e7b2021-09-03 15:31:28 -05001142 # Create map for threads: [[node_name, ns, pod_name, cmd]...]
1143 logger_cli.debug(
1144 "... runnning script on {} pods using {} threads at a time".format(
Alexb2129542021-11-23 15:49:42 -06001145 len(pod_list.items),
Alex1f90e7b2021-09-03 15:31:28 -05001146 self.env_config.threads
1147 )
1148 )
1149 _plist = []
Alexb2129542021-11-23 15:49:42 -06001150 _arguments = _args if _args else ""
Alexb78191f2021-11-02 16:35:46 -05001151 if is_script:
1152 _cmd = [
1153 "python3",
1154 os.path.join(
1155 "/",
1156 self.env_config.kube_scripts_folder,
1157 cmd
1158 ),
1159 _arguments
1160 ]
1161 _cmd = " ".join(_cmd)
1162 else:
1163 # decide if we are to wrap it to bash
1164 if '|' in cmd:
1165 _cmd = "bash -c"
1166 _arguments = cmd
1167 else:
1168 _cmd = cmd
Alexb2129542021-11-23 15:49:42 -06001169 for item in pod_list.items:
Alex1f90e7b2021-09-03 15:31:28 -05001170 _plist.append(
1171 [
1172 self,
1173 item.spec.node_name,
1174 item.metadata.namespace,
1175 item.metadata.name,
Alexb78191f2021-11-02 16:35:46 -05001176 _cmd,
1177 _arguments
Alex1f90e7b2021-09-03 15:31:28 -05001178 ]
1179 )
1180
1181 # map func and cmd
Alex1f90e7b2021-09-03 15:31:28 -05001182 pool = Pool(self.env_config.threads)
Alexb2129542021-11-23 15:49:42 -06001183 _results = []
Alex1f90e7b2021-09-03 15:31:28 -05001184 self.not_responded = []
1185 # create result list
1186 _progress = Progress(len(_plist))
1187 ret = pool.imap_unordered(_kube_exec_on_pod, _plist)
1188
1189 for ii in enumerate(ret, start=1):
1190 if not ii[1][1]:
1191 self.not_responded.append(ii[1][0])
1192 else:
Alexb2129542021-11-23 15:49:42 -06001193 _results.append(ii[1])
Alex1f90e7b2021-09-03 15:31:28 -05001194 _progress.write_progress(ii[0])
1195
1196 _progress.end()
1197 pool.close()
1198 pool.join()
1199 logger_cli.debug(
1200 "... done, {} total outputs; {} not responded".format(
1201 len(_results),
1202 len(self.not_responded)
1203 )
1204 )
1205 return _results
1206
1207 def delete_daemonset(self, ds):
1208 # Try to delete daemonset
1209 try:
1210 _r = self.kube.delete_daemon_set_by_name(
1211 ds.metadata.namespace,
1212 ds.metadata.name
1213 )
1214 except Exception as e:
1215 logger_cli.warning("Failed to delete daemonset '{}': {}".format(
1216 ds.metadata.name,
1217 e.reason
1218 ))
1219 _r = None
1220 return _r
Alex7b0ee9a2021-09-21 17:16:17 -05001221
1222 def get_pod_name_in_daemonset_by_node(self, nodename, daemonset):
1223 _podname = None
1224 _pods = self.kube.get_pods_for_daemonset(daemonset)
1225 for item in _pods.items:
1226 if item.spec.node_name == nodename:
1227 _podname = item.metadata.name
1228
1229 return _podname
1230
1231 def prepare_json_in_pod(self, podname, namespace, targets, filename):
1232 # Iterate pods in daemonset and prepare json file on each one
1233 _target_path = os.path.join(
1234 "/",
1235 "tmp",
1236 filename
1237 )
1238 # check folder will probably not needed as the daemonset links
1239 # configmap there on creation
1240 # _folder = os.path.join(
1241 # self.env_config.kube_node_homepath,
1242 # self.env_config.kube_scripts_folder
1243 # )
1244 # prepare data
1245 buffer = json.dumps(targets, indent=2).encode('utf-8')
1246
1247 # write data to pod using fancy websocket function
1248 self.kube.put_string_buffer_to_pod_as_textfile(
1249 podname,
1250 namespace,
1251 buffer,
1252 _target_path
1253 )
1254
1255 # TODO: Exception handling
1256
1257 return _target_path
Alexb78191f2021-11-02 16:35:46 -05001258
1259 def get_cmd_for_nodes(self, cmd, target_key, target_dict=None, nodes=None):
1260 """Function runs command on daemonset and parses result into place
1261 or into dict structure provided
1262
1263 :return: no return value, data pulished internally
1264 """
1265 logger_cli.debug(
1266 "... collecting results for '{}'".format(cmd)
1267 )
1268 if target_dict:
1269 _nodes = target_dict
1270 else:
1271 _nodes = self.nodes
1272 # Dirty way to get daemonset that was used in checker and not deleted
1273 _ds = self.prepared_daemonsets[0]
1274 _result = self.execute_cmd_on_daemon_set(_ds, cmd)
1275 for node, data in _nodes.items():
1276
1277 if node in self.skip_list:
1278 logger_cli.debug(
1279 "... '{}' skipped while collecting '{}'".format(
1280 node,
1281 cmd
1282 )
1283 )
1284 continue
1285 # Prepare target key
1286 if target_key not in data:
1287 data[target_key] = None
1288 # Save data
1289 if data['status'] in [NODE_DOWN, NODE_SKIP]:
1290 data[target_key] = None
1291 elif node not in _result:
1292 continue
1293 elif not _result[node]:
1294 logger_cli.debug(
1295 "... '{}' not responded after '{}'".format(
1296 node,
1297 self.env_config.salt_timeout
1298 )
1299 )
1300 data[target_key] = None
1301 else:
1302 data[target_key] = _result[node]
Alex5cace3b2021-11-10 16:40:37 -06001303
1304 def prepare_benchmark_agent(self, index, path, sc, size, template):
1305 # Load pod template
1306 _yaml_file = os.path.join(pkg_dir, 'templates', template)
1307 logger_cli.debug("... loading template '{}'".format(_yaml_file))
1308 _pod = {}
1309 with open(_yaml_file) as podFile:
1310 _pod = yaml.load(podFile, Loader=yaml.SafeLoader)
1311
1312 # set namings
1313 _n = "cfgagent-{:02}".format(index)
1314 _pvc_n = "cfgagent-pvc-{:02}".format(index)
Alex90ac1532021-12-09 11:13:14 -06001315 # _pv_n = "cfgagent-pv-{:02}".format(index)
Alex5cace3b2021-11-10 16:40:37 -06001316
1317 _pod["metadata"]["name"] = _n
1318 _pod["metadata"]["labels"]["name"] = _n
1319 # replace volumeMounts
1320 for _c in _pod["spec"]["containers"]:
1321 for _mnt in _c["volumeMounts"]:
1322 if "placeholder" in _mnt["name"]:
Alex90ac1532021-12-09 11:13:14 -06001323 # _mnt["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001324 _mnt["mountPath"] = path
1325 # replace claim
1326 for _v in _pod["spec"]["volumes"]:
Alex30380a42021-12-20 16:11:20 -06001327 if "cfgagent-pv" in _v["name"]:
Alex90ac1532021-12-09 11:13:14 -06001328 # _v["name"] = _pv_n
Alex5cace3b2021-11-10 16:40:37 -06001329 _v["persistentVolumeClaim"]["claimName"] = _pvc_n
1330
1331 # init volume resources
Alex90ac1532021-12-09 11:13:14 -06001332 # _pv_object = self.kube.init_pv_resource(_pv_n, sc, size, path)
1333 # _pv = self.kube.prepare_pv(_pv_object)
Alex30380a42021-12-20 16:11:20 -06001334 # update size of the volume to be 15% larger
Alex5cace3b2021-11-10 16:40:37 -06001335 _pvc_object = self.kube.init_pvc_resource(_pvc_n, sc, size)
1336 _pvc = self.kube.prepare_pvc(_pvc_object)
1337
1338 # start pod
1339 _pod = self.kube.prepare_pod_from_yaml(_pod)
1340
Alex90ac1532021-12-09 11:13:14 -06001341 # return _pod, _pv, _pvc
1342 return _pod, _pvc
Alex5cace3b2021-11-10 16:40:37 -06001343
1344 def expose_benchmark_agent(self, agent):
1345 return self.kube.expose_pod_port(agent, 8765)
Alex2a7657c2021-11-10 20:51:34 -06001346
1347 def cleanup_resource_by_name(self, res_type, name, ns=None, wait=False):
1348 """Cleansup resource using string res_type and the ns/name
1349
1350 Args:
1351 res_type (string): resource type name: pod, pv, pvc, svc
1352 name (string): resource name to cleanup
1353 ns (string, optional): Namespace to use. Default is 'qa-space'
1354
1355 return: (Bool) Is Success?
1356 """
1357 # fill defaults
1358 if not ns:
1359 ns = self._namespace
1360 # Handle res_type errors and choose resource type
1361 if not res_type:
1362 logger_cli.debug(
1363 "... resource type invalid: '{}'".format(res_type)
1364 )
1365 return False
1366 elif not name:
1367 logger_cli.debug("... resource name invalid: '{}'".format(name))
1368 return False
1369 elif res_type == "svc":
1370 # Delete service
1371 logger_cli.info("-> deleting svc {}/{}".format(ns, name))
1372 self.kube.CoreV1.delete_namespaced_service(name, ns)
1373 # TODO: Check if successfull
1374 elif res_type == "pod":
1375 # Delete a pod
1376 logger_cli.info("-> deleting pod {}/{}".format(ns, name))
1377 self.kube.CoreV1.delete_namespaced_pod(name, ns)
1378 if wait:
1379 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1380 elif res_type == "pvc":
1381 logger_cli.info("-> deleting pvc {}/{}".format(ns, name))
1382 self.kube.CoreV1.delete_namespaced_persistent_volume_claim(
1383 name,
1384 ns
1385 )
1386 if wait:
1387 self.kube.wait_for_phase(res_type, name, ns, ["Terminated"])
1388 elif res_type == "pv":
1389 logger_cli.info("-> deleting pv {}/{}".format(ns, name))
1390 self.kube.CoreV1.delete_persistent_volume(name)
1391 if wait:
1392 self.kube.wait_for_phase(res_type, name, None, ["Terminated"])
1393
1394 return True
Alexbfa947c2021-11-11 18:14:28 -06001395
1396 def get_resource_phase_by_name(self, typ, name, ns="qa-space"):
1397 if typ == "pod":
1398 _t = self.kube.get_pod_by_name_and_ns(name, ns)
1399 elif typ == "svc":
1400 _t = self.kube.get_svc_by_name_and_ns(name, ns)
1401 elif typ == "pvc":
1402 _t = self.kube.get_pvc_by_name_and_ns(name, ns)
1403 elif typ == "pv":
1404 _t = self.kube.get_pv_by_name(name)
1405 else:
1406 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1407 return None
1408
1409 if _t:
1410 return _t.status.phase
1411 else:
1412 return None
Alexb2129542021-11-23 15:49:42 -06001413
1414 def list_resource_names_by_type_and_ns(self, typ, ns="qa-space"):
1415 if typ == "pod":
1416 _items = self.kube.list_pods(ns)
1417 elif typ == "svc":
1418 _items = self.kube.list_svc(ns)
1419 elif typ == "pvc":
1420 _items = self.kube.list_pvc(ns)
1421 elif typ == "pv":
1422 _items = self.kube.list_pv()
1423 else:
1424 logger_cli.error("ERROR: '{}' is not supported yet".format(typ))
1425 return None
1426 return [[i.metadata.namespace, i.metadata.name] for i in _items.items]
Alex0989ecf2022-03-29 13:43:21 -05001427
1428 def get_logs_for_pod(self, podname, namespace):
1429 return self.kube.get_pod_logs(podname, namespace)