David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 1 | # Copyright 2013 Quanta Research Cambridge, Inc. |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 15 | import logging |
| 16 | import multiprocessing |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 17 | import signal |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 18 | import time |
| 19 | |
| 20 | from tempest import clients |
| 21 | from tempest.common import ssh |
| 22 | from tempest.common.utils.data_utils import rand_name |
| 23 | from tempest import exceptions |
Attila Fazekas | 1e30d5d | 2013-07-30 14:38:20 +0200 | [diff] [blame] | 24 | from tempest.openstack.common import importutils |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 25 | from tempest.stress import cleanup |
| 26 | |
| 27 | admin_manager = clients.AdminManager() |
| 28 | |
| 29 | # setup logging to file |
| 30 | logging.basicConfig( |
| 31 | format='%(asctime)s %(process)d %(name)-20s %(levelname)-8s %(message)s', |
| 32 | datefmt='%m-%d %H:%M:%S', |
| 33 | filename="stress.debug.log", |
| 34 | filemode="w", |
| 35 | level=logging.DEBUG, |
| 36 | ) |
| 37 | |
| 38 | # define a Handler which writes INFO messages or higher to the sys.stdout |
| 39 | _console = logging.StreamHandler() |
| 40 | _console.setLevel(logging.INFO) |
| 41 | # set a format which is simpler for console use |
| 42 | format_str = '%(asctime)s %(process)d %(name)-20s: %(levelname)-8s %(message)s' |
| 43 | _formatter = logging.Formatter(format_str) |
| 44 | # tell the handler to use this format |
| 45 | _console.setFormatter(_formatter) |
| 46 | # add the handler to the root logger |
| 47 | logger = logging.getLogger('tempest.stress') |
| 48 | logger.addHandler(_console) |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 49 | processes = [] |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 50 | |
| 51 | |
| 52 | def do_ssh(command, host): |
| 53 | username = admin_manager.config.stress.target_ssh_user |
| 54 | key_filename = admin_manager.config.stress.target_private_key_path |
| 55 | if not (username and key_filename): |
| 56 | return None |
| 57 | ssh_client = ssh.Client(host, username, key_filename=key_filename) |
| 58 | try: |
| 59 | return ssh_client.exec_command(command) |
| 60 | except exceptions.SSHExecCommandFailed: |
| 61 | return None |
| 62 | |
| 63 | |
| 64 | def _get_compute_nodes(controller): |
| 65 | """ |
| 66 | Returns a list of active compute nodes. List is generated by running |
| 67 | nova-manage on the controller. |
| 68 | """ |
| 69 | nodes = [] |
| 70 | cmd = "nova-manage service list | grep ^nova-compute" |
| 71 | output = do_ssh(cmd, controller) |
| 72 | if not output: |
| 73 | return nodes |
| 74 | # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46 |
| 75 | # This is fragile but there is, at present, no other way to get this info. |
| 76 | for line in output.split('\n'): |
| 77 | words = line.split() |
| 78 | if len(words) > 0 and words[4] == ":-)": |
| 79 | nodes.append(words[1]) |
| 80 | return nodes |
| 81 | |
| 82 | |
| 83 | def _error_in_logs(logfiles, nodes): |
| 84 | """ |
| 85 | Detect errors in the nova log files on the controller and compute nodes. |
| 86 | """ |
| 87 | grep = 'egrep "ERROR|TRACE" %s' % logfiles |
| 88 | for node in nodes: |
| 89 | errors = do_ssh(grep, node) |
| 90 | if not errors: |
| 91 | return None |
| 92 | if len(errors) > 0: |
| 93 | logger.error('%s: %s' % (node, errors)) |
| 94 | return errors |
| 95 | return None |
| 96 | |
| 97 | |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 98 | def sigchld_handler(signal, frame): |
| 99 | """ |
| 100 | Signal handler (only active if stop_on_error is True). |
| 101 | """ |
| 102 | terminate_all_processes() |
| 103 | |
| 104 | |
| 105 | def terminate_all_processes(): |
| 106 | """ |
| 107 | Goes through the process list and terminates all child processes. |
| 108 | """ |
| 109 | for process in processes: |
| 110 | if process['process'].is_alive(): |
| 111 | try: |
| 112 | process['process'].terminate() |
| 113 | except Exception: |
| 114 | pass |
| 115 | process['process'].join() |
| 116 | |
| 117 | |
| 118 | def stress_openstack(tests, duration, max_runs=None, stop_on_error=False): |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 119 | """ |
| 120 | Workload driver. Executes an action function against a nova-cluster. |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 121 | """ |
| 122 | logfiles = admin_manager.config.stress.target_logfiles |
| 123 | log_check_interval = int(admin_manager.config.stress.log_check_interval) |
| 124 | if logfiles: |
| 125 | controller = admin_manager.config.stress.target_controller |
| 126 | computes = _get_compute_nodes(controller) |
| 127 | for node in computes: |
| 128 | do_ssh("rm -f %s" % logfiles, node) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 129 | for test in tests: |
| 130 | if test.get('use_admin', False): |
| 131 | manager = admin_manager |
| 132 | else: |
| 133 | manager = clients.Manager() |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 134 | for p_number in xrange(test.get('threads', 1)): |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 135 | if test.get('use_isolated_tenants', False): |
| 136 | username = rand_name("stress_user") |
| 137 | tenant_name = rand_name("stress_tenant") |
| 138 | password = "pass" |
| 139 | identity_client = admin_manager.identity_client |
| 140 | _, tenant = identity_client.create_tenant(name=tenant_name) |
| 141 | identity_client.create_user(username, |
| 142 | password, |
| 143 | tenant['id'], |
| 144 | "email") |
| 145 | manager = clients.Manager(username=username, |
| 146 | password="pass", |
| 147 | tenant_name=tenant_name) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 148 | |
Attila Fazekas | 1e30d5d | 2013-07-30 14:38:20 +0200 | [diff] [blame] | 149 | test_obj = importutils.import_class(test['action']) |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 150 | test_run = test_obj(manager, logger, max_runs, stop_on_error) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 151 | |
| 152 | kwargs = test.get('kwargs', {}) |
| 153 | test_run.setUp(**dict(kwargs.iteritems())) |
| 154 | |
| 155 | logger.debug("calling Target Object %s" % |
| 156 | test_run.__class__.__name__) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 157 | |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 158 | mp_manager = multiprocessing.Manager() |
| 159 | shared_statistic = mp_manager.dict() |
| 160 | shared_statistic['runs'] = 0 |
| 161 | shared_statistic['fails'] = 0 |
| 162 | |
| 163 | p = multiprocessing.Process(target=test_run.execute, |
| 164 | args=(shared_statistic,)) |
| 165 | |
| 166 | process = {'process': p, |
| 167 | 'p_number': p_number, |
| 168 | 'action': test['action'], |
| 169 | 'statistic': shared_statistic} |
| 170 | |
| 171 | processes.append(process) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 172 | p.start() |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 173 | if stop_on_error: |
| 174 | # NOTE(mkoderer): only the parent should register the handler |
| 175 | signal.signal(signal.SIGCHLD, sigchld_handler) |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 176 | end_time = time.time() + duration |
| 177 | had_errors = False |
| 178 | while True: |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 179 | if max_runs is None: |
| 180 | remaining = end_time - time.time() |
| 181 | if remaining <= 0: |
| 182 | break |
| 183 | else: |
| 184 | remaining = log_check_interval |
| 185 | all_proc_term = True |
| 186 | for process in processes: |
| 187 | if process['process'].is_alive(): |
| 188 | all_proc_term = False |
| 189 | break |
| 190 | if all_proc_term: |
| 191 | break |
| 192 | |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 193 | time.sleep(min(remaining, log_check_interval)) |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 194 | if stop_on_error: |
| 195 | for process in processes: |
| 196 | if process['statistic']['fails'] > 0: |
| 197 | break |
| 198 | |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 199 | if not logfiles: |
| 200 | continue |
| 201 | errors = _error_in_logs(logfiles, computes) |
| 202 | if errors: |
| 203 | had_errors = True |
| 204 | break |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 205 | |
Marc Koderer | 3414d73 | 2013-07-31 08:36:36 +0200 | [diff] [blame] | 206 | terminate_all_processes() |
Marc Koderer | 69d3bea | 2013-07-18 08:32:11 +0200 | [diff] [blame] | 207 | |
| 208 | sum_fails = 0 |
| 209 | sum_runs = 0 |
| 210 | |
| 211 | logger.info("Statistics (per process):") |
| 212 | for process in processes: |
| 213 | if process['statistic']['fails'] > 0: |
| 214 | had_errors = True |
| 215 | sum_runs += process['statistic']['runs'] |
| 216 | sum_fails += process['statistic']['fails'] |
| 217 | logger.info(" Process %d (%s): Run %d actions (%d failed)" % |
| 218 | (process['p_number'], |
| 219 | process['action'], |
| 220 | process['statistic']['runs'], |
| 221 | process['statistic']['fails'])) |
| 222 | logger.info("Summary:") |
| 223 | logger.info("Run %d actions (%d failed)" % |
| 224 | (sum_runs, sum_fails)) |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 225 | |
David Kranz | b9d9750 | 2013-05-01 15:55:04 -0400 | [diff] [blame] | 226 | if not had_errors: |
| 227 | logger.info("cleaning up") |
Walter A. Boring IV | b725e62 | 2013-07-11 17:21:33 -0700 | [diff] [blame] | 228 | cleanup.cleanup(logger) |
Marc Koderer | 888ddc4 | 2013-07-23 16:13:07 +0200 | [diff] [blame] | 229 | if had_errors: |
| 230 | return 1 |
| 231 | else: |
| 232 | return 0 |