blob: b22566d8b0a9fd77a0d899214cfb250e6c2e1c20 [file] [log] [blame]
Dennis Dmitriev6f59add2016-10-18 13:45:27 +03001# Copyright 2016 Mirantis, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14
15import random
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +020016import time
Dennis Dmitriev6f59add2016-10-18 13:45:27 +030017
18from devops.helpers import helpers
19from devops.helpers import ssh_client
20from paramiko import rsakey
Dennis Dmitriev99b26fe2017-04-26 12:34:44 +030021import yaml
Dennis Dmitriev6f59add2016-10-18 13:45:27 +030022
23from tcp_tests import logger
24from tcp_tests.helpers import utils
25
26LOG = logger.logger
27
28
29class UnderlaySSHManager(object):
30 """Keep the list of SSH access credentials to Underlay nodes.
31
32 This object is initialized using config.underlay.ssh.
33
34 :param config_ssh: JSONList of SSH access credentials for nodes:
35 [
36 {
37 node_name: node1,
38 address_pool: 'public-pool01',
39 host: ,
40 port: ,
41 keys: [],
42 keys_source_host: None,
43 login: ,
44 password: ,
Dennis Dmitriev474e3f72016-10-21 16:46:09 +030045 roles: [],
Dennis Dmitriev6f59add2016-10-18 13:45:27 +030046 },
47 {
48 node_name: node1,
49 address_pool: 'private-pool01',
50 host:
51 port:
52 keys: []
53 keys_source_host: None,
54 login:
55 password:
Dennis Dmitriev474e3f72016-10-21 16:46:09 +030056 roles: [],
Dennis Dmitriev6f59add2016-10-18 13:45:27 +030057 },
58 {
59 node_name: node2,
60 address_pool: 'public-pool01',
61 keys_source_host: node1
62 ...
63 }
64 ,
65 ...
66 ]
67
68 self.node_names(): list of node names registered in underlay.
69 self.remote(): SSHClient object by a node name (w/wo address pool)
70 or by a hostname.
71 """
Dennis Dmitriev2a13a132016-11-04 00:56:23 +020072 __config = None
Dennis Dmitriev6f59add2016-10-18 13:45:27 +030073 config_ssh = None
74 config_lvm = None
75
Dennis Dmitriev2a13a132016-11-04 00:56:23 +020076 def __init__(self, config):
Dennis Dmitriev6f59add2016-10-18 13:45:27 +030077 """Read config.underlay.ssh object
78
79 :param config_ssh: dict
80 """
Dennis Dmitriev2a13a132016-11-04 00:56:23 +020081 self.__config = config
Dennis Dmitriev6f59add2016-10-18 13:45:27 +030082 if self.config_ssh is None:
83 self.config_ssh = []
84
85 if self.config_lvm is None:
86 self.config_lvm = {}
87
Dennis Dmitriev2a13a132016-11-04 00:56:23 +020088 self.add_config_ssh(self.__config.underlay.ssh)
Dennis Dmitriev6f59add2016-10-18 13:45:27 +030089
90 def add_config_ssh(self, config_ssh):
91
92 if config_ssh is None:
93 config_ssh = []
94
95 for ssh in config_ssh:
96 ssh_data = {
97 # Required keys:
98 'node_name': ssh['node_name'],
99 'host': ssh['host'],
100 'login': ssh['login'],
101 'password': ssh['password'],
102 # Optional keys:
103 'address_pool': ssh.get('address_pool', None),
104 'port': ssh.get('port', None),
105 'keys': ssh.get('keys', []),
Dennis Dmitriev474e3f72016-10-21 16:46:09 +0300106 'roles': ssh.get('roles', []),
Dennis Dmitriev6f59add2016-10-18 13:45:27 +0300107 }
108
109 if 'keys_source_host' in ssh:
110 node_name = ssh['keys_source_host']
111 remote = self.remote(node_name)
112 keys = self.__get_keys(remote)
113 ssh_data['keys'].extend(keys)
114
115 self.config_ssh.append(ssh_data)
116
117 def remove_config_ssh(self, config_ssh):
118 if config_ssh is None:
119 config_ssh = []
120
121 for ssh in config_ssh:
122 ssh_data = {
123 # Required keys:
124 'node_name': ssh['node_name'],
125 'host': ssh['host'],
126 'login': ssh['login'],
127 'password': ssh['password'],
128 # Optional keys:
129 'address_pool': ssh.get('address_pool', None),
130 'port': ssh.get('port', None),
131 'keys': ssh.get('keys', []),
Dennis Dmitriev474e3f72016-10-21 16:46:09 +0300132 'roles': ssh.get('roles', []),
Dennis Dmitriev6f59add2016-10-18 13:45:27 +0300133 }
134 self.config_ssh.remove(ssh_data)
135
136 def __get_keys(self, remote):
137 keys = []
138 remote.execute('cd ~')
139 key_string = './.ssh/id_rsa'
140 if remote.exists(key_string):
141 with remote.open(key_string) as f:
142 keys.append(rsakey.RSAKey.from_private_key(f))
143 return keys
144
145 def __ssh_data(self, node_name=None, host=None, address_pool=None):
146
147 ssh_data = None
148
149 if host is not None:
150 for ssh in self.config_ssh:
151 if host == ssh['host']:
152 ssh_data = ssh
153 break
154
155 elif node_name is not None:
156 for ssh in self.config_ssh:
157 if node_name == ssh['node_name']:
158 if address_pool is not None:
159 if address_pool == ssh['address_pool']:
160 ssh_data = ssh
161 break
162 else:
163 ssh_data = ssh
164 if ssh_data is None:
165 raise Exception('Auth data for node was not found using '
166 'node_name="{}" , host="{}" , address_pool="{}"'
167 .format(node_name, host, address_pool))
168 return ssh_data
169
170 def node_names(self):
171 """Get list of node names registered in config.underlay.ssh"""
172
173 names = [] # List is used to keep the original order of names
174 for ssh in self.config_ssh:
175 if ssh['node_name'] not in names:
176 names.append(ssh['node_name'])
177 return names
178
179 def enable_lvm(self, lvmconfig):
180 """Method for enabling lvm oh hosts in environment
181
182 :param lvmconfig: dict with ids or device' names of lvm storage
183 :raises: devops.error.DevopsCalledProcessError,
184 devops.error.TimeoutError, AssertionError, ValueError
185 """
186 def get_actions(lvm_id):
187 return [
188 "systemctl enable lvm2-lvmetad.service",
189 "systemctl enable lvm2-lvmetad.socket",
190 "systemctl start lvm2-lvmetad.service",
191 "systemctl start lvm2-lvmetad.socket",
192 "pvcreate {} && pvs".format(lvm_id),
193 "vgcreate default {} && vgs".format(lvm_id),
194 "lvcreate -L 1G -T default/pool && lvs",
195 ]
196 lvmpackages = ["lvm2", "liblvm2-dev", "thin-provisioning-tools"]
197 for node_name in self.node_names():
198 lvm = lvmconfig.get(node_name, None)
199 if not lvm:
200 continue
201 if 'id' in lvm:
202 lvmdevice = '/dev/disk/by-id/{}'.format(lvm['id'])
203 elif 'device' in lvm:
204 lvmdevice = '/dev/{}'.format(lvm['device'])
205 else:
206 raise ValueError("Unknown LVM device type")
207 if lvmdevice:
208 self.apt_install_package(
209 packages=lvmpackages, node_name=node_name, verbose=True)
210 for command in get_actions(lvmdevice):
211 self.sudo_check_call(command, node_name=node_name,
212 verbose=True)
213 self.config_lvm = dict(lvmconfig)
214
215 def host_by_node_name(self, node_name, address_pool=None):
216 ssh_data = self.__ssh_data(node_name=node_name,
217 address_pool=address_pool)
218 return ssh_data['host']
219
220 def remote(self, node_name=None, host=None, address_pool=None):
221 """Get SSHClient by a node name or hostname.
222
223 One of the following arguments should be specified:
224 - host (str): IP address or hostname. If specified, 'node_name' is
225 ignored.
226 - node_name (str): Name of the node stored to config.underlay.ssh
227 - address_pool (str): optional for node_name.
228 If None, use the first matched node_name.
229 """
230 ssh_data = self.__ssh_data(node_name=node_name, host=host,
231 address_pool=address_pool)
232 return ssh_client.SSHClient(
233 host=ssh_data['host'],
234 port=ssh_data['port'] or 22,
235 username=ssh_data['login'],
236 password=ssh_data['password'],
237 private_keys=ssh_data['keys'])
238
239 def check_call(
240 self, cmd,
241 node_name=None, host=None, address_pool=None,
242 verbose=False, timeout=None,
243 error_info=None,
244 expected=None, raise_on_err=True):
245 """Execute command on the node_name/host and check for exit code
246
247 :type cmd: str
248 :type node_name: str
249 :type host: str
250 :type verbose: bool
251 :type timeout: int
252 :type error_info: str
253 :type expected: list
254 :type raise_on_err: bool
255 :rtype: list stdout
256 :raises: devops.error.DevopsCalledProcessError
257 """
258 remote = self.remote(node_name=node_name, host=host,
259 address_pool=address_pool)
260 return remote.check_call(
261 command=cmd, verbose=verbose, timeout=timeout,
262 error_info=error_info, expected=expected,
263 raise_on_err=raise_on_err)
264
265 def apt_install_package(self, packages=None, node_name=None, host=None,
266 **kwargs):
267 """Method to install packages on ubuntu nodes
268
269 :type packages: list
270 :type node_name: str
271 :type host: str
272 :raises: devops.error.DevopsCalledProcessError,
273 devops.error.TimeoutError, AssertionError, ValueError
274
275 Other params of check_call and sudo_check_call are allowed
276 """
277 expected = kwargs.pop('expected', None)
278 if not packages or not isinstance(packages, list):
279 raise ValueError("packages list should be provided!")
280 install = "apt-get install -y {}".format(" ".join(packages))
281 # Should wait until other 'apt' jobs are finished
282 pgrep_expected = [0, 1]
283 pgrep_command = "pgrep -a -f apt"
284 helpers.wait(
285 lambda: (self.check_call(
286 pgrep_command, expected=pgrep_expected, host=host,
287 node_name=node_name, **kwargs).exit_code == 1
288 ), interval=30, timeout=1200,
289 timeout_msg="Timeout reached while waiting for apt lock"
290 )
291 # Install packages
292 self.sudo_check_call("apt-get update", node_name=node_name, host=host,
293 **kwargs)
294 self.sudo_check_call(install, expected=expected, node_name=node_name,
295 host=host, **kwargs)
296
297 def sudo_check_call(
298 self, cmd,
299 node_name=None, host=None, address_pool=None,
300 verbose=False, timeout=None,
301 error_info=None,
302 expected=None, raise_on_err=True):
303 """Execute command with sudo on node_name/host and check for exit code
304
305 :type cmd: str
306 :type node_name: str
307 :type host: str
308 :type verbose: bool
309 :type timeout: int
310 :type error_info: str
311 :type expected: list
312 :type raise_on_err: bool
313 :rtype: list stdout
314 :raises: devops.error.DevopsCalledProcessError
315 """
316 remote = self.remote(node_name=node_name, host=host,
317 address_pool=address_pool)
318 with remote.get_sudo(remote):
319 return remote.check_call(
320 command=cmd, verbose=verbose, timeout=timeout,
321 error_info=error_info, expected=expected,
322 raise_on_err=raise_on_err)
323
324 def dir_upload(self, host, source, destination):
325 """Upload local directory content to remote host
326
327 :param host: str, remote node name
328 :param source: str, local directory path
329 :param destination: str, local directory path
330 """
331 with self.remote(node_name=host) as remote:
332 remote.upload(source, destination)
333
334 def get_random_node(self):
335 """Get random node name
336
337 :return: str, name of node
338 """
339 return random.choice(self.node_names())
340
341 def yaml_editor(self, file_path, node_name=None, host=None,
342 address_pool=None):
343 """Returns an initialized YamlEditor instance for context manager
344
345 Usage (with 'underlay' fixture):
346
347 # Local YAML file
348 with underlay.yaml_editor('/path/to/file') as editor:
349 editor.content[key] = "value"
350
351 # Remote YAML file on TCP host
352 with underlay.yaml_editor('/path/to/file',
353 host=config.tcp.tcp_host) as editor:
354 editor.content[key] = "value"
355 """
356 # Local YAML file
357 if node_name is None and host is None:
358 return utils.YamlEditor(file_path=file_path)
359
360 # Remote YAML file
361 ssh_data = self.__ssh_data(node_name=node_name, host=host,
362 address_pool=address_pool)
363 return utils.YamlEditor(
364 file_path=file_path,
365 host=ssh_data['host'],
366 port=ssh_data['port'] or 22,
367 username=ssh_data['login'],
368 password=ssh_data['password'],
369 private_keys=ssh_data['keys'])
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200370
Dennis Dmitriev99b26fe2017-04-26 12:34:44 +0300371 def read_template(self, file_path):
372 """Read yaml as a jinja template"""
373 options = {
374 'config': self.__config,
375 }
376 template = utils.render_template(file_path, options=options)
377 return yaml.load(template)
378
Dennis Dmitriev2a13a132016-11-04 00:56:23 +0200379 def ensure_running_service(self, service_name, host, check_cmd,
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200380 state_running='start/running'):
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200381 """Check if the service_name running or try to restart it
382
383 :param service_name: name of the service that will be checked
384 :param node_name: node on which the service will be checked
385 :param check_cmd: shell command to ensure that the service is running
386 :param state_running: string for check the service state
387 """
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200388 cmd = "service {0} status | grep -q '{1}'".format(
389 service_name, state_running)
Dennis Dmitriev2a13a132016-11-04 00:56:23 +0200390 with self.remote(host=host) as remote:
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200391 result = remote.execute(cmd)
392 if result.exit_code != 0:
393 LOG.info("{0} is not in running state on the node {1},"
Dennis Dmitriev2a13a132016-11-04 00:56:23 +0200394 " trying to start".format(service_name, host))
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200395 cmd = ("service {0} stop;"
396 " sleep 3; killall -9 {0};"
397 "service {0} start; sleep 5;"
398 .format(service_name))
399 remote.execute(cmd)
400
401 remote.execute(check_cmd)
402 remote.execute(check_cmd)
403
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200404 def execute_commands(self, commands, label="Command"):
405 """Execute a sequence of commands
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200406
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200407 Main propose is to implement workarounds for salt formulas like:
408 - exit_code == 0 when there are actual failures
409 - salt_master and/or salt_minion stop working after executing a formula
410 - a formula fails at first run, but completes at next runs
411
412 :param label: label of the current sequence of the commands, for log
413 :param commands: list of dicts with the following data:
414 commands = [
415 ...
416 {
417 # Required:
418 'cmd': 'shell command(s) to run',
419 'node_name': 'name of the node to run the command(s)',
420 # Optional:
421 'description': 'string with a readable command description',
422 'retry': {
423 'count': int, # How many times should be run the command
424 # until success
425 'delay': int, # Delay between tries in seconds
426 },
427 'skip_fail': bool # If True - continue with the next step
428 # without failure even if count number
429 # is reached.
430 # If False - rise an exception (default)
431 },
432 ...
433 ]
434 """
435 for n, step in enumerate(commands):
436 # Required fields
437 cmd = step.get('cmd')
438 node_name = step.get('node_name')
439 # Optional fields
440 description = step.get('description', cmd)
441 retry = step.get('retry', {'count': 1, 'delay': 1})
442 retry_count = retry.get('count', 1)
443 retry_delay = retry.get('delay', 1)
Dennis Dmitriev01a3d0d2016-11-03 20:02:49 +0200444 skip_fail = step.get('skip_fail', False)
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200445
Dennis Dmitriev01a3d0d2016-11-03 20:02:49 +0200446 msg = "[ {0} #{1} ] {2}".format(label, n+1, description)
447 LOG.info("\n\n{0}\n{1}".format(msg, '=' * len(msg)))
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200448
449 with self.remote(node_name=node_name) as remote:
450
451 for x in range(retry_count, 0, -1):
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200452 time.sleep(3)
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200453 result = remote.execute(cmd, verbose=True)
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200454
455 # Workaround of exit code 0 from salt in case of failures
456 failed = 0
457 for s in result['stdout']:
458 if s.startswith("Failed:"):
459 failed += int(s.split("Failed:")[1])
460
461 if result.exit_code != 0:
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200462 time.sleep(retry_delay)
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200463 LOG.info(" === RETRY ({0}/{1}) ========================="
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200464 .format(x-1, retry_count))
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200465 elif failed != 0:
466 LOG.error(" === SALT returned exit code = 0 while "
467 "there are failed modules! ===")
468 LOG.info(" === RETRY ({0}/{1}) ======================="
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200469 .format(x-1, retry_count))
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200470 else:
Dennis Dmitriev2a13a132016-11-04 00:56:23 +0200471 if self.__config.salt.salt_master_host != '0.0.0.0':
472 # Workarounds for crashed services
473 self.ensure_running_service(
474 "salt-master",
475 self.__config.salt.salt_master_host,
476 "salt-call pillar.items",
477 'active (running)') # Hardcoded for now
478 self.ensure_running_service(
479 "salt-minion",
480 self.__config.salt.salt_master_host,
481 "salt 'cfg01*' pillar.items",
482 "active (running)") # Hardcoded for now
483 break
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200484
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200485 if x == 1 and skip_fail == False:
Dennis Dmitriev010f4cd2016-11-01 20:43:51 +0200486 # In the last retry iteration, raise an exception
487 raise Exception("Step '{0}' failed"
Dennis Dmitriev9cc4ca32016-11-03 13:50:45 +0200488 .format(description))