David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 1 | # Copyright 2013 Quanta Research Cambridge, Inc. |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 15 | import multiprocessing |
Marc Koderer | 8f940ab | 2013-09-25 17:31:50 +0200 | [diff] [blame] | 16 | import os |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 17 | import signal |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 18 | import time |
| 19 | |
Doug Hellmann | 583ce2c | 2015-03-11 14:55:46 +0000 | [diff] [blame] | 20 | from oslo_log import log as logging |
| 21 | from oslo_utils import importutils |
Matthew Treinish | 7142668 | 2015-04-23 11:19:38 -0400 | [diff] [blame] | 22 | import six |
llg8212 | 43b2050 | 2014-02-22 10:32:49 +0800 | [diff] [blame] | 23 | |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 24 | from tempest import clients |
Jamie Lennox | 1535017 | 2015-08-17 10:54:25 +1000 | [diff] [blame] | 25 | from tempest.common import cred_client |
Andrea Frittoli (andreaf) | 290b3e1 | 2015-10-08 10:25:02 +0100 | [diff] [blame] | 26 | from tempest.common import credentials_factory as credentials |
Fei Long Wang | d39431f | 2015-05-14 11:30:48 +1200 | [diff] [blame] | 27 | from tempest.common.utils import data_utils |
Matthew Treinish | 88f49ef | 2014-01-29 18:36:27 +0000 | [diff] [blame] | 28 | from tempest import config |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 29 | from tempest import exceptions |
Andrea Frittoli (andreaf) | db9672e | 2016-02-23 14:07:24 -0500 | [diff] [blame] | 30 | from tempest.lib.common import ssh |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 31 | from tempest.stress import cleanup |
| 32 | |
Matthew Treinish | 88f49ef | 2014-01-29 18:36:27 +0000 | [diff] [blame] | 33 | CONF = config.CONF |
| 34 | |
Marc Koderer | b714de5 | 2013-08-08 09:21:46 +0200 | [diff] [blame] | 35 | LOG = logging.getLogger(__name__) |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 36 | processes = [] |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 37 | |
| 38 | |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 39 | def do_ssh(command, host, ssh_user, ssh_key=None): |
| 40 | ssh_client = ssh.Client(host, ssh_user, key_filename=ssh_key) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 41 | try: |
| 42 | return ssh_client.exec_command(command) |
| 43 | except exceptions.SSHExecCommandFailed: |
DennyZhang | 6baa667 | 2013-09-24 17:49:30 -0700 | [diff] [blame] | 44 | LOG.error('do_ssh raise exception. command:%s, host:%s.' |
| 45 | % (command, host)) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 46 | return None |
| 47 | |
| 48 | |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 49 | def _get_compute_nodes(controller, ssh_user, ssh_key=None): |
Ken'ichi Ohmichi | 0afa881 | 2015-11-19 08:58:54 +0000 | [diff] [blame] | 50 | """Returns a list of active compute nodes. |
| 51 | |
| 52 | List is generated by running nova-manage on the controller. |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 53 | """ |
| 54 | nodes = [] |
| 55 | cmd = "nova-manage service list | grep ^nova-compute" |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 56 | output = do_ssh(cmd, controller, ssh_user, ssh_key) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 57 | if not output: |
| 58 | return nodes |
| 59 | # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46 |
| 60 | # This is fragile but there is, at present, no other way to get this info. |
| 61 | for line in output.split('\n'): |
| 62 | words = line.split() |
| 63 | if len(words) > 0 and words[4] == ":-)": |
| 64 | nodes.append(words[1]) |
| 65 | return nodes |
| 66 | |
| 67 | |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 68 | def _has_error_in_logs(logfiles, nodes, ssh_user, ssh_key=None, |
| 69 | stop_on_error=False): |
Ken'ichi Ohmichi | 0afa881 | 2015-11-19 08:58:54 +0000 | [diff] [blame] | 70 | """Detect errors in nova log files on the controller and compute nodes.""" |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 71 | grep = 'egrep "ERROR|TRACE" %s' % logfiles |
DennyZhang | 49b21ab | 2013-09-24 16:24:23 -0500 | [diff] [blame] | 72 | ret = False |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 73 | for node in nodes: |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 74 | errors = do_ssh(grep, node, ssh_user, ssh_key) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 75 | if len(errors) > 0: |
Marc Koderer | b714de5 | 2013-08-08 09:21:46 +0200 | [diff] [blame] | 76 | LOG.error('%s: %s' % (node, errors)) |
DennyZhang | 49b21ab | 2013-09-24 16:24:23 -0500 | [diff] [blame] | 77 | ret = True |
| 78 | if stop_on_error: |
| 79 | break |
| 80 | return ret |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 81 | |
| 82 | |
Attila Fazekas | d047d1d | 2014-04-19 21:58:47 +0200 | [diff] [blame] | 83 | def sigchld_handler(signalnum, frame): |
Ken'ichi Ohmichi | 0afa881 | 2015-11-19 08:58:54 +0000 | [diff] [blame] | 84 | """Signal handler (only active if stop_on_error is True).""" |
Attila Fazekas | d047d1d | 2014-04-19 21:58:47 +0200 | [diff] [blame] | 85 | for process in processes: |
| 86 | if (not process['process'].is_alive() and |
| 87 | process['process'].exitcode != 0): |
| 88 | signal.signal(signalnum, signal.SIG_DFL) |
| 89 | terminate_all_processes() |
| 90 | break |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 91 | |
| 92 | |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 93 | def terminate_all_processes(check_interval=20): |
Ken'ichi Ohmichi | 0afa881 | 2015-11-19 08:58:54 +0000 | [diff] [blame] | 94 | """Goes through the process list and terminates all child processes.""" |
Pavel Sedlák | 400c413 | 2014-04-29 16:31:48 +0200 | [diff] [blame] | 95 | LOG.info("Stopping all processes.") |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 96 | for process in processes: |
| 97 | if process['process'].is_alive(): |
| 98 | try: |
| 99 | process['process'].terminate() |
| 100 | except Exception: |
| 101 | pass |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 102 | time.sleep(check_interval) |
Marc Koderer | 8f940ab | 2013-09-25 17:31:50 +0200 | [diff] [blame] | 103 | for process in processes: |
| 104 | if process['process'].is_alive(): |
| 105 | try: |
| 106 | pid = process['process'].pid |
zhangguoqing | 6c09664 | 2016-01-04 06:17:21 +0000 | [diff] [blame] | 107 | LOG.warning("Process %d hangs. Send SIGKILL." % pid) |
Marc Koderer | 8f940ab | 2013-09-25 17:31:50 +0200 | [diff] [blame] | 108 | os.kill(pid, signal.SIGKILL) |
| 109 | except Exception: |
| 110 | pass |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 111 | process['process'].join() |
| 112 | |
| 113 | |
| 114 | def stress_openstack(tests, duration, max_runs=None, stop_on_error=False): |
Ken'ichi Ohmichi | 0afa881 | 2015-11-19 08:58:54 +0000 | [diff] [blame] | 115 | """Workload driver. Executes an action function against a nova-cluster.""" |
Andrea Frittoli (andreaf) | 290b3e1 | 2015-10-08 10:25:02 +0100 | [diff] [blame] | 116 | admin_manager = credentials.AdminManager() |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 117 | |
Matthew Treinish | 88f49ef | 2014-01-29 18:36:27 +0000 | [diff] [blame] | 118 | ssh_user = CONF.stress.target_ssh_user |
| 119 | ssh_key = CONF.stress.target_private_key_path |
| 120 | logfiles = CONF.stress.target_logfiles |
| 121 | log_check_interval = int(CONF.stress.log_check_interval) |
| 122 | default_thread_num = int(CONF.stress.default_thread_number_per_action) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 123 | if logfiles: |
Matthew Treinish | 88f49ef | 2014-01-29 18:36:27 +0000 | [diff] [blame] | 124 | controller = CONF.stress.target_controller |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 125 | computes = _get_compute_nodes(controller, ssh_user, ssh_key) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 126 | for node in computes: |
Marc Koderer | f13e487 | 2013-11-25 14:50:33 +0100 | [diff] [blame] | 127 | do_ssh("rm -f %s" % logfiles, node, ssh_user, ssh_key) |
David Kranz | 6c3fc15 | 2015-03-13 14:47:44 -0400 | [diff] [blame] | 128 | skip = False |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 129 | for test in tests: |
David Kranz | 6c3fc15 | 2015-03-13 14:47:44 -0400 | [diff] [blame] | 130 | for service in test.get('required_services', []): |
| 131 | if not CONF.service_available.get(service): |
| 132 | skip = True |
| 133 | break |
| 134 | if skip: |
| 135 | break |
Andrea Frittoli (andreaf) | bc0a7a6 | 2016-05-26 19:31:49 +0100 | [diff] [blame] | 136 | # TODO(andreaf) This has to be reworked to use the credential |
| 137 | # provider interface. For now only tests marked as 'use_admin' will |
| 138 | # work. |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 139 | if test.get('use_admin', False): |
| 140 | manager = admin_manager |
| 141 | else: |
Andrea Frittoli (andreaf) | bc0a7a6 | 2016-05-26 19:31:49 +0100 | [diff] [blame] | 142 | raise NotImplemented('Non admin tests are not supported') |
Sirushti Murugesan | 12dc973 | 2016-07-13 22:49:17 +0530 | [diff] [blame] | 143 | for p_number in range(test.get('threads', default_thread_num)): |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 144 | if test.get('use_isolated_tenants', False): |
Masayuki Igawa | 259c113 | 2013-10-31 17:48:44 +0900 | [diff] [blame] | 145 | username = data_utils.rand_name("stress_user") |
| 146 | tenant_name = data_utils.rand_name("stress_tenant") |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 147 | password = "pass" |
Andrea Frittoli | f2f7a37 | 2015-03-04 15:07:39 +0000 | [diff] [blame] | 148 | if CONF.identity.auth_version == 'v2': |
| 149 | identity_client = admin_manager.identity_client |
Daniel Mellado | b04da90 | 2015-11-20 17:43:12 +0100 | [diff] [blame] | 150 | projects_client = admin_manager.tenants_client |
Daniel Mellado | 6b16b92 | 2015-12-07 12:43:08 +0000 | [diff] [blame] | 151 | roles_client = admin_manager.roles_client |
Daniel Mellado | 82c83a5 | 2015-12-09 15:16:49 +0000 | [diff] [blame] | 152 | users_client = admin_manager.users_client |
Daniel Mellado | 91a26b6 | 2016-02-11 11:13:04 +0000 | [diff] [blame] | 153 | domains_client = None |
Andrea Frittoli | f2f7a37 | 2015-03-04 15:07:39 +0000 | [diff] [blame] | 154 | else: |
| 155 | identity_client = admin_manager.identity_v3_client |
Yaroslav Lobankov | 47a93ab | 2016-02-07 16:32:49 -0600 | [diff] [blame] | 156 | projects_client = admin_manager.projects_client |
Arx Cruz | 24bcb88 | 2016-02-10 15:20:16 +0100 | [diff] [blame] | 157 | roles_client = admin_manager.roles_v3_client |
Daniel Mellado | 7aea534 | 2016-02-09 09:10:12 +0000 | [diff] [blame] | 158 | users_client = admin_manager.users_v3_client |
Daniel Mellado | 91a26b6 | 2016-02-11 11:13:04 +0000 | [diff] [blame] | 159 | domains_client = admin_manager.domains_client |
Ghanshyam | 627debf | 2016-01-22 18:11:00 +0900 | [diff] [blame] | 160 | domain = (identity_client.auth_provider.credentials. |
| 161 | get('project_domain_name', 'Default')) |
Jamie Lennox | 1535017 | 2015-08-17 10:54:25 +1000 | [diff] [blame] | 162 | credentials_client = cred_client.get_creds_client( |
Daniel Mellado | 7aea534 | 2016-02-09 09:10:12 +0000 | [diff] [blame] | 163 | identity_client, projects_client, users_client, |
Daniel Mellado | 91a26b6 | 2016-02-11 11:13:04 +0000 | [diff] [blame] | 164 | roles_client, domains_client, project_domain_name=domain) |
Andrea Frittoli | f2f7a37 | 2015-03-04 15:07:39 +0000 | [diff] [blame] | 165 | project = credentials_client.create_project( |
| 166 | name=tenant_name, description=tenant_name) |
| 167 | user = credentials_client.create_user(username, password, |
Daniel Mellado | b04da90 | 2015-11-20 17:43:12 +0100 | [diff] [blame] | 168 | project, "email") |
Andrea Frittoli | f2f7a37 | 2015-03-04 15:07:39 +0000 | [diff] [blame] | 169 | # Add roles specified in config file |
| 170 | for conf_role in CONF.auth.tempest_roles: |
| 171 | credentials_client.assign_user_role(user, project, |
| 172 | conf_role) |
| 173 | creds = credentials_client.get_credentials(user, project, |
| 174 | password) |
Andrea Frittoli | 422fbdf | 2014-03-20 10:05:18 +0000 | [diff] [blame] | 175 | manager = clients.Manager(credentials=creds) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 176 | |
Attila Fazekas | 1e30d5d | 2013-07-30 14:38:20 +0200 | [diff] [blame] | 177 | test_obj = importutils.import_class(test['action']) |
Marc Koderer | b714de5 | 2013-08-08 09:21:46 +0200 | [diff] [blame] | 178 | test_run = test_obj(manager, max_runs, stop_on_error) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 179 | |
| 180 | kwargs = test.get('kwargs', {}) |
Matthew Treinish | 7142668 | 2015-04-23 11:19:38 -0400 | [diff] [blame] | 181 | test_run.setUp(**dict(six.iteritems(kwargs))) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 182 | |
Marc Koderer | b714de5 | 2013-08-08 09:21:46 +0200 | [diff] [blame] | 183 | LOG.debug("calling Target Object %s" % |
| 184 | test_run.__class__.__name__) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 185 | |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 186 | mp_manager = multiprocessing.Manager() |
| 187 | shared_statistic = mp_manager.dict() |
| 188 | shared_statistic['runs'] = 0 |
| 189 | shared_statistic['fails'] = 0 |
| 190 | |
| 191 | p = multiprocessing.Process(target=test_run.execute, |
| 192 | args=(shared_statistic,)) |
| 193 | |
| 194 | process = {'process': p, |
| 195 | 'p_number': p_number, |
Marc Koderer | 33ca6ee | 2013-08-29 09:06:36 +0200 | [diff] [blame] | 196 | 'action': test_run.action, |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 197 | 'statistic': shared_statistic} |
| 198 | |
| 199 | processes.append(process) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 200 | p.start() |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 201 | if stop_on_error: |
| 202 | # NOTE(mkoderer): only the parent should register the handler |
| 203 | signal.signal(signal.SIGCHLD, sigchld_handler) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 204 | end_time = time.time() + duration |
| 205 | had_errors = False |
Pavel Sedlák | 400c413 | 2014-04-29 16:31:48 +0200 | [diff] [blame] | 206 | try: |
| 207 | while True: |
| 208 | if max_runs is None: |
| 209 | remaining = end_time - time.time() |
| 210 | if remaining <= 0: |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 211 | break |
Pavel Sedlák | 400c413 | 2014-04-29 16:31:48 +0200 | [diff] [blame] | 212 | else: |
| 213 | remaining = log_check_interval |
| 214 | all_proc_term = True |
| 215 | for process in processes: |
| 216 | if process['process'].is_alive(): |
| 217 | all_proc_term = False |
| 218 | break |
| 219 | if all_proc_term: |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 220 | break |
| 221 | |
Pavel Sedlák | 400c413 | 2014-04-29 16:31:48 +0200 | [diff] [blame] | 222 | time.sleep(min(remaining, log_check_interval)) |
| 223 | if stop_on_error: |
Pavel Sedlák | fa6666c | 2014-04-29 16:56:48 +0200 | [diff] [blame] | 224 | if any([True for proc in processes |
| 225 | if proc['statistic']['fails'] > 0]): |
| 226 | break |
Pavel Sedlák | 400c413 | 2014-04-29 16:31:48 +0200 | [diff] [blame] | 227 | |
| 228 | if not logfiles: |
| 229 | continue |
| 230 | if _has_error_in_logs(logfiles, computes, ssh_user, ssh_key, |
| 231 | stop_on_error): |
| 232 | had_errors = True |
| 233 | break |
| 234 | except KeyboardInterrupt: |
| 235 | LOG.warning("Interrupted, going to print statistics and exit ...") |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 236 | |
Attila Fazekas | d047d1d | 2014-04-19 21:58:47 +0200 | [diff] [blame] | 237 | if stop_on_error: |
| 238 | signal.signal(signal.SIGCHLD, signal.SIG_DFL) |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 239 | terminate_all_processes() |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 240 | |
| 241 | sum_fails = 0 |
| 242 | sum_runs = 0 |
| 243 | |
Marc Koderer | b714de5 | 2013-08-08 09:21:46 +0200 | [diff] [blame] | 244 | LOG.info("Statistics (per process):") |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 245 | for process in processes: |
| 246 | if process['statistic']['fails'] > 0: |
| 247 | had_errors = True |
| 248 | sum_runs += process['statistic']['runs'] |
| 249 | sum_fails += process['statistic']['fails'] |
yuyafei | e2dbc1f | 2016-07-06 16:09:19 +0800 | [diff] [blame] | 250 | print("Process %d (%s): Run %d actions (%d failed)" % ( |
| 251 | process['p_number'], |
| 252 | process['action'], |
| 253 | process['statistic']['runs'], |
| 254 | process['statistic']['fails'])) |
| 255 | print("Summary:") |
| 256 | print("Run %d actions (%d failed)" % (sum_runs, sum_fails)) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 257 | |
Julien Leloup | a5ee542 | 2014-02-13 14:29:02 +0100 | [diff] [blame] | 258 | if not had_errors and CONF.stress.full_clean_stack: |
Marc Koderer | b714de5 | 2013-08-08 09:21:46 +0200 | [diff] [blame] | 259 | LOG.info("cleaning up") |
| 260 | cleanup.cleanup() |
Marc Koderer | 888ddc4 | 2013-07-23 16:13:07 +0200 | [diff] [blame] | 261 | if had_errors: |
| 262 | return 1 |
| 263 | else: |
| 264 | return 0 |