blob: 62e60d61da1cab99c12f6e2a238e0e571c238076 [file] [log] [blame]
David Kranzb9d97502013-05-01 15:55:04 -04001# Copyright 2013 Quanta Research Cambridge, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
David Kranzb9d97502013-05-01 15:55:04 -040015import multiprocessing
Marc Koderer8f940ab2013-09-25 17:31:50 +020016import os
Marc Koderer3414d732013-07-31 08:36:36 +020017import signal
David Kranzb9d97502013-05-01 15:55:04 -040018import time
19
Doug Hellmann583ce2c2015-03-11 14:55:46 +000020from oslo_log import log as logging
21from oslo_utils import importutils
llg821243b20502014-02-22 10:32:49 +080022from six import moves
Matthew Treinish01472ff2015-02-20 17:26:52 -050023from tempest_lib.common.utils import data_utils
llg821243b20502014-02-22 10:32:49 +080024
David Kranzb9d97502013-05-01 15:55:04 -040025from tempest import clients
Andrea Frittolif2f7a372015-03-04 15:07:39 +000026from tempest.common import isolated_creds
David Kranzb9d97502013-05-01 15:55:04 -040027from tempest.common import ssh
Matthew Treinish88f49ef2014-01-29 18:36:27 +000028from tempest import config
David Kranzb9d97502013-05-01 15:55:04 -040029from tempest import exceptions
30from tempest.stress import cleanup
31
Matthew Treinish88f49ef2014-01-29 18:36:27 +000032CONF = config.CONF
33
Marc Kodererb714de52013-08-08 09:21:46 +020034LOG = logging.getLogger(__name__)
Marc Koderer3414d732013-07-31 08:36:36 +020035processes = []
David Kranzb9d97502013-05-01 15:55:04 -040036
37
Marc Kodererf13e4872013-11-25 14:50:33 +010038def do_ssh(command, host, ssh_user, ssh_key=None):
39 ssh_client = ssh.Client(host, ssh_user, key_filename=ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040040 try:
41 return ssh_client.exec_command(command)
42 except exceptions.SSHExecCommandFailed:
DennyZhang6baa6672013-09-24 17:49:30 -070043 LOG.error('do_ssh raise exception. command:%s, host:%s.'
44 % (command, host))
David Kranzb9d97502013-05-01 15:55:04 -040045 return None
46
47
Marc Kodererf13e4872013-11-25 14:50:33 +010048def _get_compute_nodes(controller, ssh_user, ssh_key=None):
David Kranzb9d97502013-05-01 15:55:04 -040049 """
50 Returns a list of active compute nodes. List is generated by running
51 nova-manage on the controller.
52 """
53 nodes = []
54 cmd = "nova-manage service list | grep ^nova-compute"
Marc Kodererf13e4872013-11-25 14:50:33 +010055 output = do_ssh(cmd, controller, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040056 if not output:
57 return nodes
58 # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
59 # This is fragile but there is, at present, no other way to get this info.
60 for line in output.split('\n'):
61 words = line.split()
62 if len(words) > 0 and words[4] == ":-)":
63 nodes.append(words[1])
64 return nodes
65
66
Marc Kodererf13e4872013-11-25 14:50:33 +010067def _has_error_in_logs(logfiles, nodes, ssh_user, ssh_key=None,
68 stop_on_error=False):
David Kranzb9d97502013-05-01 15:55:04 -040069 """
70 Detect errors in the nova log files on the controller and compute nodes.
71 """
72 grep = 'egrep "ERROR|TRACE" %s' % logfiles
DennyZhang49b21ab2013-09-24 16:24:23 -050073 ret = False
David Kranzb9d97502013-05-01 15:55:04 -040074 for node in nodes:
Marc Kodererf13e4872013-11-25 14:50:33 +010075 errors = do_ssh(grep, node, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040076 if len(errors) > 0:
Marc Kodererb714de52013-08-08 09:21:46 +020077 LOG.error('%s: %s' % (node, errors))
DennyZhang49b21ab2013-09-24 16:24:23 -050078 ret = True
79 if stop_on_error:
80 break
81 return ret
David Kranzb9d97502013-05-01 15:55:04 -040082
83
Attila Fazekasd047d1d2014-04-19 21:58:47 +020084def sigchld_handler(signalnum, frame):
Marc Koderer3414d732013-07-31 08:36:36 +020085 """
86 Signal handler (only active if stop_on_error is True).
87 """
Attila Fazekasd047d1d2014-04-19 21:58:47 +020088 for process in processes:
89 if (not process['process'].is_alive() and
90 process['process'].exitcode != 0):
91 signal.signal(signalnum, signal.SIG_DFL)
92 terminate_all_processes()
93 break
Marc Koderer3414d732013-07-31 08:36:36 +020094
95
Marc Kodererf13e4872013-11-25 14:50:33 +010096def terminate_all_processes(check_interval=20):
Marc Koderer3414d732013-07-31 08:36:36 +020097 """
98 Goes through the process list and terminates all child processes.
99 """
Pavel Sedlák400c4132014-04-29 16:31:48 +0200100 LOG.info("Stopping all processes.")
Marc Koderer3414d732013-07-31 08:36:36 +0200101 for process in processes:
102 if process['process'].is_alive():
103 try:
104 process['process'].terminate()
105 except Exception:
106 pass
Marc Kodererf13e4872013-11-25 14:50:33 +0100107 time.sleep(check_interval)
Marc Koderer8f940ab2013-09-25 17:31:50 +0200108 for process in processes:
109 if process['process'].is_alive():
110 try:
111 pid = process['process'].pid
112 LOG.warn("Process %d hangs. Send SIGKILL." % pid)
113 os.kill(pid, signal.SIGKILL)
114 except Exception:
115 pass
Marc Koderer3414d732013-07-31 08:36:36 +0200116 process['process'].join()
117
118
119def stress_openstack(tests, duration, max_runs=None, stop_on_error=False):
David Kranzb9d97502013-05-01 15:55:04 -0400120 """
121 Workload driver. Executes an action function against a nova-cluster.
David Kranzb9d97502013-05-01 15:55:04 -0400122 """
Marc Kodererf13e4872013-11-25 14:50:33 +0100123 admin_manager = clients.AdminManager()
124
Matthew Treinish88f49ef2014-01-29 18:36:27 +0000125 ssh_user = CONF.stress.target_ssh_user
126 ssh_key = CONF.stress.target_private_key_path
127 logfiles = CONF.stress.target_logfiles
128 log_check_interval = int(CONF.stress.log_check_interval)
129 default_thread_num = int(CONF.stress.default_thread_number_per_action)
David Kranzb9d97502013-05-01 15:55:04 -0400130 if logfiles:
Matthew Treinish88f49ef2014-01-29 18:36:27 +0000131 controller = CONF.stress.target_controller
Marc Kodererf13e4872013-11-25 14:50:33 +0100132 computes = _get_compute_nodes(controller, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -0400133 for node in computes:
Marc Kodererf13e4872013-11-25 14:50:33 +0100134 do_ssh("rm -f %s" % logfiles, node, ssh_user, ssh_key)
David Kranz6c3fc152015-03-13 14:47:44 -0400135 skip = False
David Kranzb9d97502013-05-01 15:55:04 -0400136 for test in tests:
David Kranz6c3fc152015-03-13 14:47:44 -0400137 for service in test.get('required_services', []):
138 if not CONF.service_available.get(service):
139 skip = True
140 break
141 if skip:
142 break
David Kranzb9d97502013-05-01 15:55:04 -0400143 if test.get('use_admin', False):
144 manager = admin_manager
145 else:
146 manager = clients.Manager()
llg821243b20502014-02-22 10:32:49 +0800147 for p_number in moves.xrange(test.get('threads', default_thread_num)):
David Kranzb9d97502013-05-01 15:55:04 -0400148 if test.get('use_isolated_tenants', False):
Masayuki Igawa259c1132013-10-31 17:48:44 +0900149 username = data_utils.rand_name("stress_user")
150 tenant_name = data_utils.rand_name("stress_tenant")
David Kranzb9d97502013-05-01 15:55:04 -0400151 password = "pass"
Andrea Frittolif2f7a372015-03-04 15:07:39 +0000152 if CONF.identity.auth_version == 'v2':
153 identity_client = admin_manager.identity_client
154 else:
155 identity_client = admin_manager.identity_v3_client
156 credentials_client = isolated_creds.get_creds_client(
157 identity_client)
158 project = credentials_client.create_project(
159 name=tenant_name, description=tenant_name)
160 user = credentials_client.create_user(username, password,
161 project['id'], "email")
162 # Add roles specified in config file
163 for conf_role in CONF.auth.tempest_roles:
164 credentials_client.assign_user_role(user, project,
165 conf_role)
166 creds = credentials_client.get_credentials(user, project,
167 password)
Andrea Frittoli422fbdf2014-03-20 10:05:18 +0000168 manager = clients.Manager(credentials=creds)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700169
Attila Fazekas1e30d5d2013-07-30 14:38:20 +0200170 test_obj = importutils.import_class(test['action'])
Marc Kodererb714de52013-08-08 09:21:46 +0200171 test_run = test_obj(manager, max_runs, stop_on_error)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700172
173 kwargs = test.get('kwargs', {})
174 test_run.setUp(**dict(kwargs.iteritems()))
175
Marc Kodererb714de52013-08-08 09:21:46 +0200176 LOG.debug("calling Target Object %s" %
177 test_run.__class__.__name__)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700178
Marc Koderer69d3bea2013-07-18 08:32:11 +0200179 mp_manager = multiprocessing.Manager()
180 shared_statistic = mp_manager.dict()
181 shared_statistic['runs'] = 0
182 shared_statistic['fails'] = 0
183
184 p = multiprocessing.Process(target=test_run.execute,
185 args=(shared_statistic,))
186
187 process = {'process': p,
188 'p_number': p_number,
Marc Koderer33ca6ee2013-08-29 09:06:36 +0200189 'action': test_run.action,
Marc Koderer69d3bea2013-07-18 08:32:11 +0200190 'statistic': shared_statistic}
191
192 processes.append(process)
David Kranzb9d97502013-05-01 15:55:04 -0400193 p.start()
Marc Koderer3414d732013-07-31 08:36:36 +0200194 if stop_on_error:
195 # NOTE(mkoderer): only the parent should register the handler
196 signal.signal(signal.SIGCHLD, sigchld_handler)
David Kranzb9d97502013-05-01 15:55:04 -0400197 end_time = time.time() + duration
198 had_errors = False
Pavel Sedlák400c4132014-04-29 16:31:48 +0200199 try:
200 while True:
201 if max_runs is None:
202 remaining = end_time - time.time()
203 if remaining <= 0:
Marc Koderer69d3bea2013-07-18 08:32:11 +0200204 break
Pavel Sedlák400c4132014-04-29 16:31:48 +0200205 else:
206 remaining = log_check_interval
207 all_proc_term = True
208 for process in processes:
209 if process['process'].is_alive():
210 all_proc_term = False
211 break
212 if all_proc_term:
Marc Koderer3414d732013-07-31 08:36:36 +0200213 break
214
Pavel Sedlák400c4132014-04-29 16:31:48 +0200215 time.sleep(min(remaining, log_check_interval))
216 if stop_on_error:
Pavel Sedlákfa6666c2014-04-29 16:56:48 +0200217 if any([True for proc in processes
218 if proc['statistic']['fails'] > 0]):
219 break
Pavel Sedlák400c4132014-04-29 16:31:48 +0200220
221 if not logfiles:
222 continue
223 if _has_error_in_logs(logfiles, computes, ssh_user, ssh_key,
224 stop_on_error):
225 had_errors = True
226 break
227 except KeyboardInterrupt:
228 LOG.warning("Interrupted, going to print statistics and exit ...")
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700229
Attila Fazekasd047d1d2014-04-19 21:58:47 +0200230 if stop_on_error:
231 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
Marc Koderer3414d732013-07-31 08:36:36 +0200232 terminate_all_processes()
Marc Koderer69d3bea2013-07-18 08:32:11 +0200233
234 sum_fails = 0
235 sum_runs = 0
236
Marc Kodererb714de52013-08-08 09:21:46 +0200237 LOG.info("Statistics (per process):")
Marc Koderer69d3bea2013-07-18 08:32:11 +0200238 for process in processes:
239 if process['statistic']['fails'] > 0:
240 had_errors = True
241 sum_runs += process['statistic']['runs']
242 sum_fails += process['statistic']['fails']
Marc Kodererb714de52013-08-08 09:21:46 +0200243 LOG.info(" Process %d (%s): Run %d actions (%d failed)" %
244 (process['p_number'],
245 process['action'],
246 process['statistic']['runs'],
Marc Koderer69d3bea2013-07-18 08:32:11 +0200247 process['statistic']['fails']))
Marc Kodererb714de52013-08-08 09:21:46 +0200248 LOG.info("Summary:")
249 LOG.info("Run %d actions (%d failed)" %
250 (sum_runs, sum_fails))
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700251
Julien Leloupa5ee5422014-02-13 14:29:02 +0100252 if not had_errors and CONF.stress.full_clean_stack:
Marc Kodererb714de52013-08-08 09:21:46 +0200253 LOG.info("cleaning up")
254 cleanup.cleanup()
Marc Koderer888ddc42013-07-23 16:13:07 +0200255 if had_errors:
256 return 1
257 else:
258 return 0