blob: 3715636bf4dc7953e8b0676225be95f889664a04 [file] [log] [blame]
David Kranzb9d97502013-05-01 15:55:04 -04001# Copyright 2013 Quanta Research Cambridge, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
David Kranzb9d97502013-05-01 15:55:04 -040015import multiprocessing
Marc Koderer8f940ab2013-09-25 17:31:50 +020016import os
Marc Koderer3414d732013-07-31 08:36:36 +020017import signal
David Kranzb9d97502013-05-01 15:55:04 -040018import time
19
20from tempest import clients
21from tempest.common import ssh
Masayuki Igawa259c1132013-10-31 17:48:44 +090022from tempest.common.utils import data_utils
Matthew Treinish88f49ef2014-01-29 18:36:27 +000023from tempest import config
David Kranzb9d97502013-05-01 15:55:04 -040024from tempest import exceptions
Attila Fazekas1e30d5d2013-07-30 14:38:20 +020025from tempest.openstack.common import importutils
Marc Kodererb714de52013-08-08 09:21:46 +020026from tempest.openstack.common import log as logging
David Kranzb9d97502013-05-01 15:55:04 -040027from tempest.stress import cleanup
28
Matthew Treinish88f49ef2014-01-29 18:36:27 +000029CONF = config.CONF
30
Marc Kodererb714de52013-08-08 09:21:46 +020031LOG = logging.getLogger(__name__)
Marc Koderer3414d732013-07-31 08:36:36 +020032processes = []
David Kranzb9d97502013-05-01 15:55:04 -040033
34
Marc Kodererf13e4872013-11-25 14:50:33 +010035def do_ssh(command, host, ssh_user, ssh_key=None):
36 ssh_client = ssh.Client(host, ssh_user, key_filename=ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040037 try:
38 return ssh_client.exec_command(command)
39 except exceptions.SSHExecCommandFailed:
DennyZhang6baa6672013-09-24 17:49:30 -070040 LOG.error('do_ssh raise exception. command:%s, host:%s.'
41 % (command, host))
David Kranzb9d97502013-05-01 15:55:04 -040042 return None
43
44
Marc Kodererf13e4872013-11-25 14:50:33 +010045def _get_compute_nodes(controller, ssh_user, ssh_key=None):
David Kranzb9d97502013-05-01 15:55:04 -040046 """
47 Returns a list of active compute nodes. List is generated by running
48 nova-manage on the controller.
49 """
50 nodes = []
51 cmd = "nova-manage service list | grep ^nova-compute"
Marc Kodererf13e4872013-11-25 14:50:33 +010052 output = do_ssh(cmd, controller, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040053 if not output:
54 return nodes
55 # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
56 # This is fragile but there is, at present, no other way to get this info.
57 for line in output.split('\n'):
58 words = line.split()
59 if len(words) > 0 and words[4] == ":-)":
60 nodes.append(words[1])
61 return nodes
62
63
Marc Kodererf13e4872013-11-25 14:50:33 +010064def _has_error_in_logs(logfiles, nodes, ssh_user, ssh_key=None,
65 stop_on_error=False):
David Kranzb9d97502013-05-01 15:55:04 -040066 """
67 Detect errors in the nova log files on the controller and compute nodes.
68 """
69 grep = 'egrep "ERROR|TRACE" %s' % logfiles
DennyZhang49b21ab2013-09-24 16:24:23 -050070 ret = False
David Kranzb9d97502013-05-01 15:55:04 -040071 for node in nodes:
Marc Kodererf13e4872013-11-25 14:50:33 +010072 errors = do_ssh(grep, node, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -040073 if len(errors) > 0:
Marc Kodererb714de52013-08-08 09:21:46 +020074 LOG.error('%s: %s' % (node, errors))
DennyZhang49b21ab2013-09-24 16:24:23 -050075 ret = True
76 if stop_on_error:
77 break
78 return ret
David Kranzb9d97502013-05-01 15:55:04 -040079
80
Marc Koderer3414d732013-07-31 08:36:36 +020081def sigchld_handler(signal, frame):
82 """
83 Signal handler (only active if stop_on_error is True).
84 """
85 terminate_all_processes()
86
87
Marc Kodererf13e4872013-11-25 14:50:33 +010088def terminate_all_processes(check_interval=20):
Marc Koderer3414d732013-07-31 08:36:36 +020089 """
90 Goes through the process list and terminates all child processes.
91 """
92 for process in processes:
93 if process['process'].is_alive():
94 try:
95 process['process'].terminate()
96 except Exception:
97 pass
Marc Kodererf13e4872013-11-25 14:50:33 +010098 time.sleep(check_interval)
Marc Koderer8f940ab2013-09-25 17:31:50 +020099 for process in processes:
100 if process['process'].is_alive():
101 try:
102 pid = process['process'].pid
103 LOG.warn("Process %d hangs. Send SIGKILL." % pid)
104 os.kill(pid, signal.SIGKILL)
105 except Exception:
106 pass
Marc Koderer3414d732013-07-31 08:36:36 +0200107 process['process'].join()
108
109
110def stress_openstack(tests, duration, max_runs=None, stop_on_error=False):
David Kranzb9d97502013-05-01 15:55:04 -0400111 """
112 Workload driver. Executes an action function against a nova-cluster.
David Kranzb9d97502013-05-01 15:55:04 -0400113 """
Marc Kodererf13e4872013-11-25 14:50:33 +0100114 admin_manager = clients.AdminManager()
115
Matthew Treinish88f49ef2014-01-29 18:36:27 +0000116 ssh_user = CONF.stress.target_ssh_user
117 ssh_key = CONF.stress.target_private_key_path
118 logfiles = CONF.stress.target_logfiles
119 log_check_interval = int(CONF.stress.log_check_interval)
120 default_thread_num = int(CONF.stress.default_thread_number_per_action)
David Kranzb9d97502013-05-01 15:55:04 -0400121 if logfiles:
Matthew Treinish88f49ef2014-01-29 18:36:27 +0000122 controller = CONF.stress.target_controller
Marc Kodererf13e4872013-11-25 14:50:33 +0100123 computes = _get_compute_nodes(controller, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -0400124 for node in computes:
Marc Kodererf13e4872013-11-25 14:50:33 +0100125 do_ssh("rm -f %s" % logfiles, node, ssh_user, ssh_key)
David Kranzb9d97502013-05-01 15:55:04 -0400126 for test in tests:
127 if test.get('use_admin', False):
128 manager = admin_manager
129 else:
130 manager = clients.Manager()
Marc Koderer32221b8e2013-08-23 13:57:50 +0200131 for p_number in xrange(test.get('threads', default_thread_num)):
David Kranzb9d97502013-05-01 15:55:04 -0400132 if test.get('use_isolated_tenants', False):
Masayuki Igawa259c1132013-10-31 17:48:44 +0900133 username = data_utils.rand_name("stress_user")
134 tenant_name = data_utils.rand_name("stress_tenant")
David Kranzb9d97502013-05-01 15:55:04 -0400135 password = "pass"
136 identity_client = admin_manager.identity_client
137 _, tenant = identity_client.create_tenant(name=tenant_name)
138 identity_client.create_user(username,
139 password,
140 tenant['id'],
141 "email")
142 manager = clients.Manager(username=username,
143 password="pass",
144 tenant_name=tenant_name)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700145
Attila Fazekas1e30d5d2013-07-30 14:38:20 +0200146 test_obj = importutils.import_class(test['action'])
Marc Kodererb714de52013-08-08 09:21:46 +0200147 test_run = test_obj(manager, max_runs, stop_on_error)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700148
149 kwargs = test.get('kwargs', {})
150 test_run.setUp(**dict(kwargs.iteritems()))
151
Marc Kodererb714de52013-08-08 09:21:46 +0200152 LOG.debug("calling Target Object %s" %
153 test_run.__class__.__name__)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700154
Marc Koderer69d3bea2013-07-18 08:32:11 +0200155 mp_manager = multiprocessing.Manager()
156 shared_statistic = mp_manager.dict()
157 shared_statistic['runs'] = 0
158 shared_statistic['fails'] = 0
159
160 p = multiprocessing.Process(target=test_run.execute,
161 args=(shared_statistic,))
162
163 process = {'process': p,
164 'p_number': p_number,
Marc Koderer33ca6ee2013-08-29 09:06:36 +0200165 'action': test_run.action,
Marc Koderer69d3bea2013-07-18 08:32:11 +0200166 'statistic': shared_statistic}
167
168 processes.append(process)
David Kranzb9d97502013-05-01 15:55:04 -0400169 p.start()
Marc Koderer3414d732013-07-31 08:36:36 +0200170 if stop_on_error:
171 # NOTE(mkoderer): only the parent should register the handler
172 signal.signal(signal.SIGCHLD, sigchld_handler)
David Kranzb9d97502013-05-01 15:55:04 -0400173 end_time = time.time() + duration
174 had_errors = False
175 while True:
Marc Koderer69d3bea2013-07-18 08:32:11 +0200176 if max_runs is None:
177 remaining = end_time - time.time()
178 if remaining <= 0:
179 break
180 else:
181 remaining = log_check_interval
182 all_proc_term = True
183 for process in processes:
184 if process['process'].is_alive():
185 all_proc_term = False
186 break
187 if all_proc_term:
188 break
189
David Kranzb9d97502013-05-01 15:55:04 -0400190 time.sleep(min(remaining, log_check_interval))
Marc Koderer3414d732013-07-31 08:36:36 +0200191 if stop_on_error:
192 for process in processes:
193 if process['statistic']['fails'] > 0:
194 break
195
David Kranzb9d97502013-05-01 15:55:04 -0400196 if not logfiles:
197 continue
Marc Kodererf13e4872013-11-25 14:50:33 +0100198 if _has_error_in_logs(logfiles, computes, ssh_user, ssh_key,
199 stop_on_error):
David Kranzb9d97502013-05-01 15:55:04 -0400200 had_errors = True
201 break
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700202
Marc Koderer3414d732013-07-31 08:36:36 +0200203 terminate_all_processes()
Marc Koderer69d3bea2013-07-18 08:32:11 +0200204
205 sum_fails = 0
206 sum_runs = 0
207
Marc Kodererb714de52013-08-08 09:21:46 +0200208 LOG.info("Statistics (per process):")
Marc Koderer69d3bea2013-07-18 08:32:11 +0200209 for process in processes:
210 if process['statistic']['fails'] > 0:
211 had_errors = True
212 sum_runs += process['statistic']['runs']
213 sum_fails += process['statistic']['fails']
Marc Kodererb714de52013-08-08 09:21:46 +0200214 LOG.info(" Process %d (%s): Run %d actions (%d failed)" %
215 (process['p_number'],
216 process['action'],
217 process['statistic']['runs'],
Marc Koderer69d3bea2013-07-18 08:32:11 +0200218 process['statistic']['fails']))
Marc Kodererb714de52013-08-08 09:21:46 +0200219 LOG.info("Summary:")
220 LOG.info("Run %d actions (%d failed)" %
221 (sum_runs, sum_fails))
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700222
Julien Leloupa5ee5422014-02-13 14:29:02 +0100223 if not had_errors and CONF.stress.full_clean_stack:
Marc Kodererb714de52013-08-08 09:21:46 +0200224 LOG.info("cleaning up")
225 cleanup.cleanup()
Marc Koderer888ddc42013-07-23 16:13:07 +0200226 if had_errors:
227 return 1
228 else:
229 return 0