Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 1 | # Copyright 2011 Quanta Research Cambridge, Inc. |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | """The entry point for the execution of a workloadTo execute a workload. |
| 15 | Users pass in a description of the workload and a nova manager object |
| 16 | to the bash_openstack function call""" |
| 17 | |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 18 | import datetime |
Matthew Treinish | 8d6836b | 2012-12-10 10:07:56 -0500 | [diff] [blame] | 19 | import random |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 20 | import time |
Matthew Treinish | 615ea6a | 2013-02-25 17:26:59 -0500 | [diff] [blame^] | 21 | from urlparse import urlparse |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 22 | |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 23 | from config import StressConfig |
Matthew Treinish | 8d6836b | 2012-12-10 10:07:56 -0500 | [diff] [blame] | 24 | from state import ClusterState |
| 25 | from state import FloatingIpState |
| 26 | from state import KeyPairState |
| 27 | from state import VolumeState |
Attila Fazekas | 73c152d | 2013-02-16 16:41:03 +0100 | [diff] [blame] | 28 | import stress.utils |
| 29 | from test_case import logging |
| 30 | |
David Kranz | 779c7f8 | 2012-05-01 16:50:32 -0400 | [diff] [blame] | 31 | from tempest.common.utils.data_utils import rand_name |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 32 | |
| 33 | # setup logging to file |
| 34 | logging.basicConfig( |
| 35 | format='%(asctime)s %(name)-20s %(levelname)-8s %(message)s', |
| 36 | datefmt='%m-%d %H:%M:%S', |
| 37 | filename="stress.debug.log", |
| 38 | filemode="w", |
| 39 | level=logging.DEBUG, |
Zhongyue Luo | 30a563f | 2012-09-30 23:43:50 +0900 | [diff] [blame] | 40 | ) |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 41 | |
| 42 | # define a Handler which writes INFO messages or higher to the sys.stdout |
| 43 | _console = logging.StreamHandler() |
| 44 | _console.setLevel(logging.INFO) |
| 45 | # set a format which is simpler for console use |
| 46 | _formatter = logging.Formatter('%(name)-20s: %(levelname)-8s %(message)s') |
| 47 | # tell the handler to use this format |
| 48 | _console.setFormatter(_formatter) |
| 49 | # add the handler to the root logger |
| 50 | logging.getLogger('').addHandler(_console) |
| 51 | |
| 52 | |
| 53 | def _create_cases(choice_spec): |
| 54 | """ |
| 55 | Generate a workload of tests from workload description |
| 56 | """ |
| 57 | cases = [] |
| 58 | count = 0 |
| 59 | for choice in choice_spec: |
| 60 | p = choice.probability |
| 61 | for i in range(p): |
| 62 | cases.append(choice) |
| 63 | i = i + p |
| 64 | count = count + p |
| 65 | assert(count == 100) |
| 66 | return cases |
| 67 | |
| 68 | |
| 69 | def _get_compute_nodes(keypath, user, controller): |
| 70 | """ |
| 71 | Returns a list of active compute nodes. List is generated by running |
| 72 | nova-manage on the controller. |
| 73 | """ |
| 74 | nodes = [] |
Zhongyue Luo | e471d6e | 2012-09-17 17:02:43 +0800 | [diff] [blame] | 75 | if keypath is None or user is None: |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 76 | return nodes |
Zhongyue Luo | 79d8d36 | 2012-09-25 13:49:27 +0800 | [diff] [blame] | 77 | cmd = "nova-manage service list | grep ^nova-compute" |
Attila Fazekas | 73c152d | 2013-02-16 16:41:03 +0100 | [diff] [blame] | 78 | lines = stress.utils.ssh(keypath, user, controller, cmd).split('\n') |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 79 | # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46 |
| 80 | # This is fragile but there is, at present, no other way to get this info. |
| 81 | for line in lines: |
| 82 | words = line.split() |
| 83 | if len(words) > 0 and words[4] == ":-)": |
| 84 | nodes.append(words[1]) |
| 85 | return nodes |
| 86 | |
| 87 | |
| 88 | def _error_in_logs(keypath, logdir, user, nodes): |
| 89 | """ |
| 90 | Detect errors in the nova log files on the controller and compute nodes. |
| 91 | """ |
| 92 | grep = 'egrep "ERROR\|TRACE" %s/*.log' % logdir |
| 93 | for node in nodes: |
Attila Fazekas | 73c152d | 2013-02-16 16:41:03 +0100 | [diff] [blame] | 94 | errors = stress.utils.ssh(keypath, user, node, grep, check=False) |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 95 | if len(errors) > 0: |
| 96 | logging.error('%s: %s' % (node, errors)) |
| 97 | return True |
| 98 | return False |
| 99 | |
| 100 | |
David Kranz | 779c7f8 | 2012-05-01 16:50:32 -0400 | [diff] [blame] | 101 | def create_initial_vms(manager, state, count): |
| 102 | image = manager.config.compute.image_ref |
| 103 | flavor = manager.config.compute.flavor_ref |
| 104 | servers = [] |
| 105 | logging.info('Creating %d vms' % count) |
| 106 | for _ in xrange(count): |
| 107 | name = rand_name('initial_vm-') |
| 108 | _, server = manager.servers_client.create_server(name, image, flavor) |
| 109 | servers.append(server) |
| 110 | for server in servers: |
| 111 | manager.servers_client.wait_for_server_status(server['id'], 'ACTIVE') |
| 112 | logging.info('Server Name: %s Id: %s' % (name, server['id'])) |
| 113 | state.set_instance_state(server['id'], (server, 'ACTIVE')) |
| 114 | |
| 115 | |
| 116 | def create_initial_floating_ips(manager, state, count): |
| 117 | logging.info('Creating %d floating ips' % count) |
| 118 | for _ in xrange(count): |
| 119 | _, ip = manager.floating_ips_client.create_floating_ip() |
| 120 | logging.info('Ip: %s' % ip['ip']) |
| 121 | state.add_floating_ip(FloatingIpState(ip)) |
| 122 | |
| 123 | |
| 124 | def create_initial_keypairs(manager, state, count): |
| 125 | logging.info('Creating %d keypairs' % count) |
| 126 | for _ in xrange(count): |
| 127 | name = rand_name('keypair-') |
| 128 | _, keypair = manager.keypairs_client.create_keypair(name) |
| 129 | logging.info('Keypair: %s' % name) |
| 130 | state.add_keypair(KeyPairState(keypair)) |
| 131 | |
| 132 | |
| 133 | def create_initial_volumes(manager, state, count): |
| 134 | volumes = [] |
| 135 | logging.info('Creating %d volumes' % count) |
| 136 | for _ in xrange(count): |
| 137 | name = rand_name('volume-') |
| 138 | _, volume = manager.volumes_client.create_volume(size=1, |
| 139 | display_name=name) |
| 140 | volumes.append(volume) |
| 141 | for volume in volumes: |
| 142 | manager.volumes_client.wait_for_volume_status(volume['id'], |
| 143 | 'available') |
| 144 | logging.info('Volume Name: %s Id: %s' % (name, volume['id'])) |
| 145 | state.add_volume(VolumeState(volume)) |
| 146 | |
| 147 | |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 148 | def bash_openstack(manager, |
| 149 | choice_spec, |
| 150 | **kwargs): |
| 151 | """ |
| 152 | Workload driver. Executes a workload as specified by the `choice_spec` |
| 153 | parameter against a nova-cluster. |
| 154 | |
| 155 | `manager` : Manager object |
| 156 | `choice_spec` : list of BasherChoice actions to run on the cluster |
| 157 | `kargs` : keyword arguments to the constructor of `test_case` |
| 158 | `duration` = how long this test should last (3 sec) |
| 159 | `sleep_time` = time to sleep between actions (in msec) |
| 160 | `test_name` = human readable workload description |
| 161 | (default: unnamed test) |
| 162 | `max_vms` = maximum number of instances to launch |
| 163 | (default: 32) |
| 164 | `seed` = random seed (default: None) |
| 165 | """ |
Matthew Treinish | 615ea6a | 2013-02-25 17:26:59 -0500 | [diff] [blame^] | 166 | stress_config = StressConfig(manager.config) |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 167 | # get keyword arguments |
| 168 | duration = kwargs.get('duration', datetime.timedelta(seconds=10)) |
| 169 | seed = kwargs.get('seed', None) |
| 170 | sleep_time = float(kwargs.get('sleep_time', 3000)) / 1000 |
| 171 | max_vms = int(kwargs.get('max_vms', stress_config.max_instances)) |
| 172 | test_name = kwargs.get('test_name', 'unamed test') |
| 173 | |
| 174 | keypath = stress_config.host_private_key_path |
| 175 | user = stress_config.host_admin_user |
| 176 | logdir = stress_config.nova_logdir |
Matthew Treinish | 615ea6a | 2013-02-25 17:26:59 -0500 | [diff] [blame^] | 177 | host = urlparse(manager.config.identity.uri).hostname |
| 178 | computes = _get_compute_nodes(keypath, user, host) |
Attila Fazekas | 73c152d | 2013-02-16 16:41:03 +0100 | [diff] [blame] | 179 | stress.utils.execute_on_all(keypath, user, computes, |
| 180 | "rm -f %s/*.log" % logdir) |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 181 | random.seed(seed) |
| 182 | cases = _create_cases(choice_spec) |
David Kranz | 779c7f8 | 2012-05-01 16:50:32 -0400 | [diff] [blame] | 183 | state = ClusterState(max_vms=max_vms) |
| 184 | create_initial_keypairs(manager, state, |
Zhongyue Luo | e0884a3 | 2012-09-25 17:24:17 +0800 | [diff] [blame] | 185 | int(kwargs.get('initial_keypairs', 0))) |
David Kranz | 779c7f8 | 2012-05-01 16:50:32 -0400 | [diff] [blame] | 186 | create_initial_vms(manager, state, |
| 187 | int(kwargs.get('initial_vms', 0))) |
| 188 | create_initial_floating_ips(manager, state, |
| 189 | int(kwargs.get('initial_floating_ips', 0))) |
| 190 | create_initial_volumes(manager, state, |
Zhongyue Luo | e0884a3 | 2012-09-25 17:24:17 +0800 | [diff] [blame] | 191 | int(kwargs.get('initial_volumes', 0))) |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 192 | test_end_time = time.time() + duration.seconds |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 193 | |
| 194 | retry_list = [] |
| 195 | last_retry = time.time() |
| 196 | cooldown = False |
| 197 | logcheck_count = 0 |
| 198 | test_succeeded = True |
| 199 | logging.debug('=== Test \"%s\" on %s ===' % |
| 200 | (test_name, time.asctime(time.localtime()))) |
| 201 | for kw in kwargs: |
| 202 | logging.debug('\t%s = %s', kw, kwargs[kw]) |
| 203 | |
| 204 | while True: |
| 205 | if not cooldown: |
| 206 | if time.time() < test_end_time: |
| 207 | case = random.choice(cases) |
| 208 | logging.debug('Chose %s' % case) |
| 209 | retry = case.invoke(manager, state) |
Zhongyue Luo | e471d6e | 2012-09-17 17:02:43 +0800 | [diff] [blame] | 210 | if retry is not None: |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 211 | retry_list.append(retry) |
| 212 | else: |
| 213 | logging.info('Cooling down...') |
| 214 | cooldown = True |
| 215 | if cooldown and len(retry_list) == 0: |
| 216 | if _error_in_logs(keypath, logdir, user, computes): |
| 217 | test_succeeded = False |
| 218 | break |
| 219 | # Retry verifications every 5 seconds. |
| 220 | if time.time() - last_retry > 5: |
| 221 | logging.debug('retry verifications for %d tasks', len(retry_list)) |
| 222 | new_retry_list = [] |
| 223 | for v in retry_list: |
David Kranz | 779c7f8 | 2012-05-01 16:50:32 -0400 | [diff] [blame] | 224 | v.check_timeout() |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 225 | if not v.retry(): |
| 226 | new_retry_list.append(v) |
| 227 | retry_list = new_retry_list |
| 228 | last_retry = time.time() |
| 229 | time.sleep(sleep_time) |
| 230 | # Check error logs after 100 actions |
| 231 | if logcheck_count > 100: |
| 232 | if _error_in_logs(keypath, logdir, user, computes): |
| 233 | test_succeeded = False |
| 234 | break |
| 235 | else: |
| 236 | logcheck_count = 0 |
| 237 | else: |
| 238 | logcheck_count = logcheck_count + 1 |
| 239 | # Cleanup |
| 240 | logging.info('Cleaning up: terminating virtual machines...') |
| 241 | vms = state.get_instances() |
David Kranz | 779c7f8 | 2012-05-01 16:50:32 -0400 | [diff] [blame] | 242 | active_vms = [v for _k, v in vms.iteritems() |
| 243 | if v and v[1] != 'TERMINATING'] |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 244 | for target in active_vms: |
| 245 | manager.servers_client.delete_server(target[0]['id']) |
| 246 | # check to see that the server was actually killed |
| 247 | for target in active_vms: |
| 248 | kill_id = target[0]['id'] |
| 249 | i = 0 |
| 250 | while True: |
| 251 | try: |
| 252 | manager.servers_client.get_server(kill_id) |
| 253 | except Exception: |
| 254 | break |
| 255 | i += 1 |
| 256 | if i > 60: |
| 257 | _error_in_logs(keypath, logdir, user, computes) |
| 258 | raise Exception("Cleanup timed out") |
| 259 | time.sleep(1) |
| 260 | logging.info('killed %s' % kill_id) |
| 261 | state.delete_instance_state(kill_id) |
David Kranz | 779c7f8 | 2012-05-01 16:50:32 -0400 | [diff] [blame] | 262 | for floating_ip_state in state.get_floating_ips(): |
| 263 | manager.floating_ips_client.delete_floating_ip( |
| 264 | floating_ip_state.resource_id) |
| 265 | for keypair_state in state.get_keypairs(): |
| 266 | manager.keypairs_client.delete_keypair(keypair_state.name) |
| 267 | for volume_state in state.get_volumes(): |
| 268 | manager.volumes_client.delete_volume(volume_state.resource_id) |
Sean Dague | 7011236 | 2012-04-03 13:48:49 -0400 | [diff] [blame] | 269 | |
| 270 | if test_succeeded: |
| 271 | logging.info('*** Test succeeded ***') |
| 272 | else: |
| 273 | logging.info('*** Test had errors ***') |
| 274 | return test_succeeded |