blob: 1e33e8856f74a50674905f9aa0b5bffce95f21dd [file] [log] [blame]
David Kranzb9d97502013-05-01 15:55:04 -04001# Copyright 2013 Quanta Research Cambridge, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
David Kranzb9d97502013-05-01 15:55:04 -040015import multiprocessing
Marc Koderer8f940ab2013-09-25 17:31:50 +020016import os
Marc Koderer3414d732013-07-31 08:36:36 +020017import signal
David Kranzb9d97502013-05-01 15:55:04 -040018import time
19
Doug Hellmann583ce2c2015-03-11 14:55:46 +000020from oslo_log import log as logging
21from oslo_utils import importutils
Matthew Treinish71426682015-04-23 11:19:38 -040022import six
llg821243b20502014-02-22 10:32:49 +080023
David Kranzb9d97502013-05-01 15:55:04 -040024from tempest import clients
Jamie Lennox15350172015-08-17 10:54:25 +100025from tempest.common import cred_client
Andrea Frittoli (andreaf)290b3e12015-10-08 10:25:02 +010026from tempest.common import credentials_factory as credentials
Fei Long Wangd39431f2015-05-14 11:30:48 +120027from tempest.common.utils import data_utils
Matthew Treinish88f49ef2014-01-29 18:36:27 +000028from tempest import config
David Kranzb9d97502013-05-01 15:55:04 -040029from tempest import exceptions
Andrea Frittoli (andreaf)db9672e2016-02-23 14:07:24 -050030from tempest.lib.common import ssh
David Kranzb9d97502013-05-01 15:55:04 -040031from tempest.stress import cleanup
32
Matthew Treinish88f49ef2014-01-29 18:36:27 +000033CONF = config.CONF
34
Marc Kodererb714de52013-08-08 09:21:46 +020035LOG = logging.getLogger(__name__)
Marc Koderer3414d732013-07-31 08:36:36 +020036processes = []
David Kranzb9d97502013-05-01 15:55:04 -040037
38
Marc Kodererf13e4872013-11-25 14:50:33 +010039def do_ssh(command, host, ssh_user, ssh_key=None):
40 ssh_client = ssh.Client(host, ssh_user, key_filename=ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040041 try:
42 return ssh_client.exec_command(command)
43 except exceptions.SSHExecCommandFailed:
DennyZhang6baa6672013-09-24 17:49:30 -070044 LOG.error('do_ssh raise exception. command:%s, host:%s.'
45 % (command, host))
David Kranzb9d97502013-05-01 15:55:04 -040046 return None
47
48
Marc Kodererf13e4872013-11-25 14:50:33 +010049def _get_compute_nodes(controller, ssh_user, ssh_key=None):
Ken'ichi Ohmichi0afa8812015-11-19 08:58:54 +000050 """Returns a list of active compute nodes.
51
52 List is generated by running nova-manage on the controller.
David Kranzb9d97502013-05-01 15:55:04 -040053 """
54 nodes = []
55 cmd = "nova-manage service list | grep ^nova-compute"
Marc Kodererf13e4872013-11-25 14:50:33 +010056 output = do_ssh(cmd, controller, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040057 if not output:
58 return nodes
59 # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
60 # This is fragile but there is, at present, no other way to get this info.
61 for line in output.split('\n'):
62 words = line.split()
63 if len(words) > 0 and words[4] == ":-)":
64 nodes.append(words[1])
65 return nodes
66
67
Marc Kodererf13e4872013-11-25 14:50:33 +010068def _has_error_in_logs(logfiles, nodes, ssh_user, ssh_key=None,
69 stop_on_error=False):
Ken'ichi Ohmichi0afa8812015-11-19 08:58:54 +000070 """Detect errors in nova log files on the controller and compute nodes."""
David Kranzb9d97502013-05-01 15:55:04 -040071 grep = 'egrep "ERROR|TRACE" %s' % logfiles
DennyZhang49b21ab2013-09-24 16:24:23 -050072 ret = False
David Kranzb9d97502013-05-01 15:55:04 -040073 for node in nodes:
Marc Kodererf13e4872013-11-25 14:50:33 +010074 errors = do_ssh(grep, node, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040075 if len(errors) > 0:
Marc Kodererb714de52013-08-08 09:21:46 +020076 LOG.error('%s: %s' % (node, errors))
DennyZhang49b21ab2013-09-24 16:24:23 -050077 ret = True
78 if stop_on_error:
79 break
80 return ret
David Kranzb9d97502013-05-01 15:55:04 -040081
82
Attila Fazekasd047d1d2014-04-19 21:58:47 +020083def sigchld_handler(signalnum, frame):
Ken'ichi Ohmichi0afa8812015-11-19 08:58:54 +000084 """Signal handler (only active if stop_on_error is True)."""
Attila Fazekasd047d1d2014-04-19 21:58:47 +020085 for process in processes:
86 if (not process['process'].is_alive() and
87 process['process'].exitcode != 0):
88 signal.signal(signalnum, signal.SIG_DFL)
89 terminate_all_processes()
90 break
Marc Koderer3414d732013-07-31 08:36:36 +020091
92
Marc Kodererf13e4872013-11-25 14:50:33 +010093def terminate_all_processes(check_interval=20):
Ken'ichi Ohmichi0afa8812015-11-19 08:58:54 +000094 """Goes through the process list and terminates all child processes."""
Pavel Sedlák400c4132014-04-29 16:31:48 +020095 LOG.info("Stopping all processes.")
Marc Koderer3414d732013-07-31 08:36:36 +020096 for process in processes:
97 if process['process'].is_alive():
98 try:
99 process['process'].terminate()
100 except Exception:
101 pass
Marc Kodererf13e4872013-11-25 14:50:33 +0100102 time.sleep(check_interval)
Marc Koderer8f940ab2013-09-25 17:31:50 +0200103 for process in processes:
104 if process['process'].is_alive():
105 try:
106 pid = process['process'].pid
zhangguoqing6c096642016-01-04 06:17:21 +0000107 LOG.warning("Process %d hangs. Send SIGKILL." % pid)
Marc Koderer8f940ab2013-09-25 17:31:50 +0200108 os.kill(pid, signal.SIGKILL)
109 except Exception:
110 pass
Marc Koderer3414d732013-07-31 08:36:36 +0200111 process['process'].join()
112
113
114def stress_openstack(tests, duration, max_runs=None, stop_on_error=False):
Ken'ichi Ohmichi0afa8812015-11-19 08:58:54 +0000115 """Workload driver. Executes an action function against a nova-cluster."""
Andrea Frittoli (andreaf)290b3e12015-10-08 10:25:02 +0100116 admin_manager = credentials.AdminManager()
Marc Kodererf13e4872013-11-25 14:50:33 +0100117
Matthew Treinish88f49ef2014-01-29 18:36:27 +0000118 ssh_user = CONF.stress.target_ssh_user
119 ssh_key = CONF.stress.target_private_key_path
120 logfiles = CONF.stress.target_logfiles
121 log_check_interval = int(CONF.stress.log_check_interval)
122 default_thread_num = int(CONF.stress.default_thread_number_per_action)
David Kranzb9d97502013-05-01 15:55:04 -0400123 if logfiles:
Matthew Treinish88f49ef2014-01-29 18:36:27 +0000124 controller = CONF.stress.target_controller
Marc Kodererf13e4872013-11-25 14:50:33 +0100125 computes = _get_compute_nodes(controller, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -0400126 for node in computes:
Marc Kodererf13e4872013-11-25 14:50:33 +0100127 do_ssh("rm -f %s" % logfiles, node, ssh_user, ssh_key)
David Kranz6c3fc152015-03-13 14:47:44 -0400128 skip = False
David Kranzb9d97502013-05-01 15:55:04 -0400129 for test in tests:
David Kranz6c3fc152015-03-13 14:47:44 -0400130 for service in test.get('required_services', []):
131 if not CONF.service_available.get(service):
132 skip = True
133 break
134 if skip:
135 break
Andrea Frittoli (andreaf)bc0a7a62016-05-26 19:31:49 +0100136 # TODO(andreaf) This has to be reworked to use the credential
137 # provider interface. For now only tests marked as 'use_admin' will
138 # work.
David Kranzb9d97502013-05-01 15:55:04 -0400139 if test.get('use_admin', False):
140 manager = admin_manager
141 else:
Andrea Frittoli (andreaf)bc0a7a62016-05-26 19:31:49 +0100142 raise NotImplemented('Non admin tests are not supported')
Sirushti Murugesan12dc9732016-07-13 22:49:17 +0530143 for p_number in range(test.get('threads', default_thread_num)):
David Kranzb9d97502013-05-01 15:55:04 -0400144 if test.get('use_isolated_tenants', False):
Masayuki Igawa259c1132013-10-31 17:48:44 +0900145 username = data_utils.rand_name("stress_user")
146 tenant_name = data_utils.rand_name("stress_tenant")
David Kranzb9d97502013-05-01 15:55:04 -0400147 password = "pass"
Andrea Frittolif2f7a372015-03-04 15:07:39 +0000148 if CONF.identity.auth_version == 'v2':
149 identity_client = admin_manager.identity_client
Daniel Melladob04da902015-11-20 17:43:12 +0100150 projects_client = admin_manager.tenants_client
Daniel Mellado6b16b922015-12-07 12:43:08 +0000151 roles_client = admin_manager.roles_client
Daniel Mellado82c83a52015-12-09 15:16:49 +0000152 users_client = admin_manager.users_client
Daniel Mellado91a26b62016-02-11 11:13:04 +0000153 domains_client = None
Andrea Frittolif2f7a372015-03-04 15:07:39 +0000154 else:
155 identity_client = admin_manager.identity_v3_client
Yaroslav Lobankov47a93ab2016-02-07 16:32:49 -0600156 projects_client = admin_manager.projects_client
Arx Cruz24bcb882016-02-10 15:20:16 +0100157 roles_client = admin_manager.roles_v3_client
Daniel Mellado7aea5342016-02-09 09:10:12 +0000158 users_client = admin_manager.users_v3_client
Daniel Mellado91a26b62016-02-11 11:13:04 +0000159 domains_client = admin_manager.domains_client
Ghanshyam627debf2016-01-22 18:11:00 +0900160 domain = (identity_client.auth_provider.credentials.
161 get('project_domain_name', 'Default'))
Jamie Lennox15350172015-08-17 10:54:25 +1000162 credentials_client = cred_client.get_creds_client(
Daniel Mellado7aea5342016-02-09 09:10:12 +0000163 identity_client, projects_client, users_client,
Daniel Mellado91a26b62016-02-11 11:13:04 +0000164 roles_client, domains_client, project_domain_name=domain)
Andrea Frittolif2f7a372015-03-04 15:07:39 +0000165 project = credentials_client.create_project(
166 name=tenant_name, description=tenant_name)
167 user = credentials_client.create_user(username, password,
Daniel Melladob04da902015-11-20 17:43:12 +0100168 project, "email")
Andrea Frittolif2f7a372015-03-04 15:07:39 +0000169 # Add roles specified in config file
170 for conf_role in CONF.auth.tempest_roles:
171 credentials_client.assign_user_role(user, project,
172 conf_role)
173 creds = credentials_client.get_credentials(user, project,
174 password)
Andrea Frittoli422fbdf2014-03-20 10:05:18 +0000175 manager = clients.Manager(credentials=creds)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700176
Attila Fazekas1e30d5d2013-07-30 14:38:20 +0200177 test_obj = importutils.import_class(test['action'])
Marc Kodererb714de52013-08-08 09:21:46 +0200178 test_run = test_obj(manager, max_runs, stop_on_error)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700179
180 kwargs = test.get('kwargs', {})
Matthew Treinish71426682015-04-23 11:19:38 -0400181 test_run.setUp(**dict(six.iteritems(kwargs)))
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700182
Marc Kodererb714de52013-08-08 09:21:46 +0200183 LOG.debug("calling Target Object %s" %
184 test_run.__class__.__name__)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700185
Marc Koderer69d3bea2013-07-18 08:32:11 +0200186 mp_manager = multiprocessing.Manager()
187 shared_statistic = mp_manager.dict()
188 shared_statistic['runs'] = 0
189 shared_statistic['fails'] = 0
190
191 p = multiprocessing.Process(target=test_run.execute,
192 args=(shared_statistic,))
193
194 process = {'process': p,
195 'p_number': p_number,
Marc Koderer33ca6ee2013-08-29 09:06:36 +0200196 'action': test_run.action,
Marc Koderer69d3bea2013-07-18 08:32:11 +0200197 'statistic': shared_statistic}
198
199 processes.append(process)
David Kranzb9d97502013-05-01 15:55:04 -0400200 p.start()
Marc Koderer3414d732013-07-31 08:36:36 +0200201 if stop_on_error:
202 # NOTE(mkoderer): only the parent should register the handler
203 signal.signal(signal.SIGCHLD, sigchld_handler)
David Kranzb9d97502013-05-01 15:55:04 -0400204 end_time = time.time() + duration
205 had_errors = False
Pavel Sedlák400c4132014-04-29 16:31:48 +0200206 try:
207 while True:
208 if max_runs is None:
209 remaining = end_time - time.time()
210 if remaining <= 0:
Marc Koderer69d3bea2013-07-18 08:32:11 +0200211 break
Pavel Sedlák400c4132014-04-29 16:31:48 +0200212 else:
213 remaining = log_check_interval
214 all_proc_term = True
215 for process in processes:
216 if process['process'].is_alive():
217 all_proc_term = False
218 break
219 if all_proc_term:
Marc Koderer3414d732013-07-31 08:36:36 +0200220 break
221
Pavel Sedlák400c4132014-04-29 16:31:48 +0200222 time.sleep(min(remaining, log_check_interval))
223 if stop_on_error:
Pavel Sedlákfa6666c2014-04-29 16:56:48 +0200224 if any([True for proc in processes
225 if proc['statistic']['fails'] > 0]):
226 break
Pavel Sedlák400c4132014-04-29 16:31:48 +0200227
228 if not logfiles:
229 continue
230 if _has_error_in_logs(logfiles, computes, ssh_user, ssh_key,
231 stop_on_error):
232 had_errors = True
233 break
234 except KeyboardInterrupt:
235 LOG.warning("Interrupted, going to print statistics and exit ...")
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700236
Attila Fazekasd047d1d2014-04-19 21:58:47 +0200237 if stop_on_error:
238 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
Marc Koderer3414d732013-07-31 08:36:36 +0200239 terminate_all_processes()
Marc Koderer69d3bea2013-07-18 08:32:11 +0200240
241 sum_fails = 0
242 sum_runs = 0
243
Marc Kodererb714de52013-08-08 09:21:46 +0200244 LOG.info("Statistics (per process):")
Marc Koderer69d3bea2013-07-18 08:32:11 +0200245 for process in processes:
246 if process['statistic']['fails'] > 0:
247 had_errors = True
248 sum_runs += process['statistic']['runs']
249 sum_fails += process['statistic']['fails']
yuyafeie2dbc1f2016-07-06 16:09:19 +0800250 print("Process %d (%s): Run %d actions (%d failed)" % (
251 process['p_number'],
252 process['action'],
253 process['statistic']['runs'],
254 process['statistic']['fails']))
255 print("Summary:")
256 print("Run %d actions (%d failed)" % (sum_runs, sum_fails))
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700257
Julien Leloupa5ee5422014-02-13 14:29:02 +0100258 if not had_errors and CONF.stress.full_clean_stack:
Marc Kodererb714de52013-08-08 09:21:46 +0200259 LOG.info("cleaning up")
260 cleanup.cleanup()
Marc Koderer888ddc42013-07-23 16:13:07 +0200261 if had_errors:
262 return 1
263 else:
264 return 0