blob: d9b95e005f1ac1732609fef9af7e8aee48dd657e [file] [log] [blame]
David Kranzb9d97502013-05-01 15:55:04 -04001# Copyright 2013 Quanta Research Cambridge, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
David Kranzb9d97502013-05-01 15:55:04 -040015import logging
16import multiprocessing
Marc Koderer3414d732013-07-31 08:36:36 +020017import signal
David Kranzb9d97502013-05-01 15:55:04 -040018import time
19
20from tempest import clients
21from tempest.common import ssh
22from tempest.common.utils.data_utils import rand_name
23from tempest import exceptions
Attila Fazekas1e30d5d2013-07-30 14:38:20 +020024from tempest.openstack.common import importutils
David Kranzb9d97502013-05-01 15:55:04 -040025from tempest.stress import cleanup
26
27admin_manager = clients.AdminManager()
28
29# setup logging to file
30logging.basicConfig(
31 format='%(asctime)s %(process)d %(name)-20s %(levelname)-8s %(message)s',
32 datefmt='%m-%d %H:%M:%S',
33 filename="stress.debug.log",
34 filemode="w",
35 level=logging.DEBUG,
36)
37
38# define a Handler which writes INFO messages or higher to the sys.stdout
39_console = logging.StreamHandler()
40_console.setLevel(logging.INFO)
41# set a format which is simpler for console use
42format_str = '%(asctime)s %(process)d %(name)-20s: %(levelname)-8s %(message)s'
43_formatter = logging.Formatter(format_str)
44# tell the handler to use this format
45_console.setFormatter(_formatter)
46# add the handler to the root logger
47logger = logging.getLogger('tempest.stress')
48logger.addHandler(_console)
Marc Koderer3414d732013-07-31 08:36:36 +020049processes = []
David Kranzb9d97502013-05-01 15:55:04 -040050
51
52def do_ssh(command, host):
53 username = admin_manager.config.stress.target_ssh_user
54 key_filename = admin_manager.config.stress.target_private_key_path
55 if not (username and key_filename):
56 return None
57 ssh_client = ssh.Client(host, username, key_filename=key_filename)
58 try:
59 return ssh_client.exec_command(command)
60 except exceptions.SSHExecCommandFailed:
61 return None
62
63
64def _get_compute_nodes(controller):
65 """
66 Returns a list of active compute nodes. List is generated by running
67 nova-manage on the controller.
68 """
69 nodes = []
70 cmd = "nova-manage service list | grep ^nova-compute"
71 output = do_ssh(cmd, controller)
72 if not output:
73 return nodes
74 # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
75 # This is fragile but there is, at present, no other way to get this info.
76 for line in output.split('\n'):
77 words = line.split()
78 if len(words) > 0 and words[4] == ":-)":
79 nodes.append(words[1])
80 return nodes
81
82
83def _error_in_logs(logfiles, nodes):
84 """
85 Detect errors in the nova log files on the controller and compute nodes.
86 """
87 grep = 'egrep "ERROR|TRACE" %s' % logfiles
88 for node in nodes:
89 errors = do_ssh(grep, node)
90 if not errors:
91 return None
92 if len(errors) > 0:
93 logger.error('%s: %s' % (node, errors))
94 return errors
95 return None
96
97
Marc Koderer3414d732013-07-31 08:36:36 +020098def sigchld_handler(signal, frame):
99 """
100 Signal handler (only active if stop_on_error is True).
101 """
102 terminate_all_processes()
103
104
105def terminate_all_processes():
106 """
107 Goes through the process list and terminates all child processes.
108 """
109 for process in processes:
110 if process['process'].is_alive():
111 try:
112 process['process'].terminate()
113 except Exception:
114 pass
115 process['process'].join()
116
117
118def stress_openstack(tests, duration, max_runs=None, stop_on_error=False):
David Kranzb9d97502013-05-01 15:55:04 -0400119 """
120 Workload driver. Executes an action function against a nova-cluster.
David Kranzb9d97502013-05-01 15:55:04 -0400121 """
122 logfiles = admin_manager.config.stress.target_logfiles
123 log_check_interval = int(admin_manager.config.stress.log_check_interval)
124 if logfiles:
125 controller = admin_manager.config.stress.target_controller
126 computes = _get_compute_nodes(controller)
127 for node in computes:
128 do_ssh("rm -f %s" % logfiles, node)
David Kranzb9d97502013-05-01 15:55:04 -0400129 for test in tests:
130 if test.get('use_admin', False):
131 manager = admin_manager
132 else:
133 manager = clients.Manager()
Marc Koderer69d3bea2013-07-18 08:32:11 +0200134 for p_number in xrange(test.get('threads', 1)):
David Kranzb9d97502013-05-01 15:55:04 -0400135 if test.get('use_isolated_tenants', False):
136 username = rand_name("stress_user")
137 tenant_name = rand_name("stress_tenant")
138 password = "pass"
139 identity_client = admin_manager.identity_client
140 _, tenant = identity_client.create_tenant(name=tenant_name)
141 identity_client.create_user(username,
142 password,
143 tenant['id'],
144 "email")
145 manager = clients.Manager(username=username,
146 password="pass",
147 tenant_name=tenant_name)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700148
Attila Fazekas1e30d5d2013-07-30 14:38:20 +0200149 test_obj = importutils.import_class(test['action'])
Marc Koderer3414d732013-07-31 08:36:36 +0200150 test_run = test_obj(manager, logger, max_runs, stop_on_error)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700151
152 kwargs = test.get('kwargs', {})
153 test_run.setUp(**dict(kwargs.iteritems()))
154
155 logger.debug("calling Target Object %s" %
156 test_run.__class__.__name__)
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700157
Marc Koderer69d3bea2013-07-18 08:32:11 +0200158 mp_manager = multiprocessing.Manager()
159 shared_statistic = mp_manager.dict()
160 shared_statistic['runs'] = 0
161 shared_statistic['fails'] = 0
162
163 p = multiprocessing.Process(target=test_run.execute,
164 args=(shared_statistic,))
165
166 process = {'process': p,
167 'p_number': p_number,
168 'action': test['action'],
169 'statistic': shared_statistic}
170
171 processes.append(process)
David Kranzb9d97502013-05-01 15:55:04 -0400172 p.start()
Marc Koderer3414d732013-07-31 08:36:36 +0200173 if stop_on_error:
174 # NOTE(mkoderer): only the parent should register the handler
175 signal.signal(signal.SIGCHLD, sigchld_handler)
David Kranzb9d97502013-05-01 15:55:04 -0400176 end_time = time.time() + duration
177 had_errors = False
178 while True:
Marc Koderer69d3bea2013-07-18 08:32:11 +0200179 if max_runs is None:
180 remaining = end_time - time.time()
181 if remaining <= 0:
182 break
183 else:
184 remaining = log_check_interval
185 all_proc_term = True
186 for process in processes:
187 if process['process'].is_alive():
188 all_proc_term = False
189 break
190 if all_proc_term:
191 break
192
David Kranzb9d97502013-05-01 15:55:04 -0400193 time.sleep(min(remaining, log_check_interval))
Marc Koderer3414d732013-07-31 08:36:36 +0200194 if stop_on_error:
195 for process in processes:
196 if process['statistic']['fails'] > 0:
197 break
198
David Kranzb9d97502013-05-01 15:55:04 -0400199 if not logfiles:
200 continue
201 errors = _error_in_logs(logfiles, computes)
202 if errors:
203 had_errors = True
204 break
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700205
Marc Koderer3414d732013-07-31 08:36:36 +0200206 terminate_all_processes()
Marc Koderer69d3bea2013-07-18 08:32:11 +0200207
208 sum_fails = 0
209 sum_runs = 0
210
211 logger.info("Statistics (per process):")
212 for process in processes:
213 if process['statistic']['fails'] > 0:
214 had_errors = True
215 sum_runs += process['statistic']['runs']
216 sum_fails += process['statistic']['fails']
217 logger.info(" Process %d (%s): Run %d actions (%d failed)" %
218 (process['p_number'],
219 process['action'],
220 process['statistic']['runs'],
221 process['statistic']['fails']))
222 logger.info("Summary:")
223 logger.info("Run %d actions (%d failed)" %
224 (sum_runs, sum_fails))
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700225
David Kranzb9d97502013-05-01 15:55:04 -0400226 if not had_errors:
227 logger.info("cleaning up")
Walter A. Boring IVb725e622013-07-11 17:21:33 -0700228 cleanup.cleanup(logger)
Marc Koderer888ddc42013-07-23 16:13:07 +0200229 if had_errors:
230 return 1
231 else:
232 return 0