blob: 71d02a99353b52329d850d7f89fe88a61cde7f24 [file] [log] [blame]
Sean Dague70112362012-04-03 13:48:49 -04001# Copyright 2011 Quanta Research Cambridge, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""The entry point for the execution of a workloadTo execute a workload.
15Users pass in a description of the workload and a nova manager object
16to the bash_openstack function call"""
17
18
19import random
20import datetime
21import time
22
23
24# local imports
25from test_case import *
Sean Dague70112362012-04-03 13:48:49 -040026import utils.util
27from config import StressConfig
David Kranz779c7f82012-05-01 16:50:32 -040028from state import ClusterState, KeyPairState, FloatingIpState, VolumeState
29from tempest.common.utils.data_utils import rand_name
30
Sean Dague70112362012-04-03 13:48:49 -040031
32# setup logging to file
33logging.basicConfig(
34 format='%(asctime)s %(name)-20s %(levelname)-8s %(message)s',
35 datefmt='%m-%d %H:%M:%S',
36 filename="stress.debug.log",
37 filemode="w",
38 level=logging.DEBUG,
39 )
40
41# define a Handler which writes INFO messages or higher to the sys.stdout
42_console = logging.StreamHandler()
43_console.setLevel(logging.INFO)
44# set a format which is simpler for console use
45_formatter = logging.Formatter('%(name)-20s: %(levelname)-8s %(message)s')
46# tell the handler to use this format
47_console.setFormatter(_formatter)
48# add the handler to the root logger
49logging.getLogger('').addHandler(_console)
50
51
52def _create_cases(choice_spec):
53 """
54 Generate a workload of tests from workload description
55 """
56 cases = []
57 count = 0
58 for choice in choice_spec:
59 p = choice.probability
60 for i in range(p):
61 cases.append(choice)
62 i = i + p
63 count = count + p
64 assert(count == 100)
65 return cases
66
67
68def _get_compute_nodes(keypath, user, controller):
69 """
70 Returns a list of active compute nodes. List is generated by running
71 nova-manage on the controller.
72 """
73 nodes = []
74 if keypath == None or user == None:
75 return nodes
76 lines = utils.util.ssh(keypath, user, controller,
77 "nova-manage service list | grep ^nova-compute").\
78 split('\n')
79 # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
80 # This is fragile but there is, at present, no other way to get this info.
81 for line in lines:
82 words = line.split()
83 if len(words) > 0 and words[4] == ":-)":
84 nodes.append(words[1])
85 return nodes
86
87
88def _error_in_logs(keypath, logdir, user, nodes):
89 """
90 Detect errors in the nova log files on the controller and compute nodes.
91 """
92 grep = 'egrep "ERROR\|TRACE" %s/*.log' % logdir
93 for node in nodes:
94 errors = utils.util.ssh(keypath, user, node, grep, check=False)
95 if len(errors) > 0:
96 logging.error('%s: %s' % (node, errors))
97 return True
98 return False
99
100
David Kranz779c7f82012-05-01 16:50:32 -0400101def create_initial_vms(manager, state, count):
102 image = manager.config.compute.image_ref
103 flavor = manager.config.compute.flavor_ref
104 servers = []
105 logging.info('Creating %d vms' % count)
106 for _ in xrange(count):
107 name = rand_name('initial_vm-')
108 _, server = manager.servers_client.create_server(name, image, flavor)
109 servers.append(server)
110 for server in servers:
111 manager.servers_client.wait_for_server_status(server['id'], 'ACTIVE')
112 logging.info('Server Name: %s Id: %s' % (name, server['id']))
113 state.set_instance_state(server['id'], (server, 'ACTIVE'))
114
115
116def create_initial_floating_ips(manager, state, count):
117 logging.info('Creating %d floating ips' % count)
118 for _ in xrange(count):
119 _, ip = manager.floating_ips_client.create_floating_ip()
120 logging.info('Ip: %s' % ip['ip'])
121 state.add_floating_ip(FloatingIpState(ip))
122
123
124def create_initial_keypairs(manager, state, count):
125 logging.info('Creating %d keypairs' % count)
126 for _ in xrange(count):
127 name = rand_name('keypair-')
128 _, keypair = manager.keypairs_client.create_keypair(name)
129 logging.info('Keypair: %s' % name)
130 state.add_keypair(KeyPairState(keypair))
131
132
133def create_initial_volumes(manager, state, count):
134 volumes = []
135 logging.info('Creating %d volumes' % count)
136 for _ in xrange(count):
137 name = rand_name('volume-')
138 _, volume = manager.volumes_client.create_volume(size=1,
139 display_name=name)
140 volumes.append(volume)
141 for volume in volumes:
142 manager.volumes_client.wait_for_volume_status(volume['id'],
143 'available')
144 logging.info('Volume Name: %s Id: %s' % (name, volume['id']))
145 state.add_volume(VolumeState(volume))
146
147
Sean Dague70112362012-04-03 13:48:49 -0400148def bash_openstack(manager,
149 choice_spec,
150 **kwargs):
151 """
152 Workload driver. Executes a workload as specified by the `choice_spec`
153 parameter against a nova-cluster.
154
155 `manager` : Manager object
156 `choice_spec` : list of BasherChoice actions to run on the cluster
157 `kargs` : keyword arguments to the constructor of `test_case`
158 `duration` = how long this test should last (3 sec)
159 `sleep_time` = time to sleep between actions (in msec)
160 `test_name` = human readable workload description
161 (default: unnamed test)
162 `max_vms` = maximum number of instances to launch
163 (default: 32)
164 `seed` = random seed (default: None)
165 """
166 stress_config = StressConfig(manager.config._conf)
167 # get keyword arguments
168 duration = kwargs.get('duration', datetime.timedelta(seconds=10))
169 seed = kwargs.get('seed', None)
170 sleep_time = float(kwargs.get('sleep_time', 3000)) / 1000
171 max_vms = int(kwargs.get('max_vms', stress_config.max_instances))
172 test_name = kwargs.get('test_name', 'unamed test')
173
174 keypath = stress_config.host_private_key_path
175 user = stress_config.host_admin_user
176 logdir = stress_config.nova_logdir
177 computes = _get_compute_nodes(keypath, user, manager.config.identity.host)
178 utils.util.execute_on_all(keypath, user, computes,
179 "rm -f %s/*.log" % logdir)
180 random.seed(seed)
181 cases = _create_cases(choice_spec)
David Kranz779c7f82012-05-01 16:50:32 -0400182 state = ClusterState(max_vms=max_vms)
183 create_initial_keypairs(manager, state,
184 int(kwargs.get('initial_keypairs', 0)))
185 create_initial_vms(manager, state,
186 int(kwargs.get('initial_vms', 0)))
187 create_initial_floating_ips(manager, state,
188 int(kwargs.get('initial_floating_ips', 0)))
189 create_initial_volumes(manager, state,
190 int(kwargs.get('initial_volumes', 0)))
Sean Dague70112362012-04-03 13:48:49 -0400191 test_end_time = time.time() + duration.seconds
Sean Dague70112362012-04-03 13:48:49 -0400192
193 retry_list = []
194 last_retry = time.time()
195 cooldown = False
196 logcheck_count = 0
197 test_succeeded = True
198 logging.debug('=== Test \"%s\" on %s ===' %
199 (test_name, time.asctime(time.localtime())))
200 for kw in kwargs:
201 logging.debug('\t%s = %s', kw, kwargs[kw])
202
203 while True:
204 if not cooldown:
205 if time.time() < test_end_time:
206 case = random.choice(cases)
207 logging.debug('Chose %s' % case)
208 retry = case.invoke(manager, state)
209 if retry != None:
210 retry_list.append(retry)
211 else:
212 logging.info('Cooling down...')
213 cooldown = True
214 if cooldown and len(retry_list) == 0:
215 if _error_in_logs(keypath, logdir, user, computes):
216 test_succeeded = False
217 break
218 # Retry verifications every 5 seconds.
219 if time.time() - last_retry > 5:
220 logging.debug('retry verifications for %d tasks', len(retry_list))
221 new_retry_list = []
222 for v in retry_list:
David Kranz779c7f82012-05-01 16:50:32 -0400223 v.check_timeout()
Sean Dague70112362012-04-03 13:48:49 -0400224 if not v.retry():
225 new_retry_list.append(v)
226 retry_list = new_retry_list
227 last_retry = time.time()
228 time.sleep(sleep_time)
229 # Check error logs after 100 actions
230 if logcheck_count > 100:
231 if _error_in_logs(keypath, logdir, user, computes):
232 test_succeeded = False
233 break
234 else:
235 logcheck_count = 0
236 else:
237 logcheck_count = logcheck_count + 1
238 # Cleanup
239 logging.info('Cleaning up: terminating virtual machines...')
240 vms = state.get_instances()
David Kranz779c7f82012-05-01 16:50:32 -0400241 active_vms = [v for _k, v in vms.iteritems()
242 if v and v[1] != 'TERMINATING']
Sean Dague70112362012-04-03 13:48:49 -0400243 for target in active_vms:
244 manager.servers_client.delete_server(target[0]['id'])
245 # check to see that the server was actually killed
246 for target in active_vms:
247 kill_id = target[0]['id']
248 i = 0
249 while True:
250 try:
251 manager.servers_client.get_server(kill_id)
252 except Exception:
253 break
254 i += 1
255 if i > 60:
256 _error_in_logs(keypath, logdir, user, computes)
257 raise Exception("Cleanup timed out")
258 time.sleep(1)
259 logging.info('killed %s' % kill_id)
260 state.delete_instance_state(kill_id)
David Kranz779c7f82012-05-01 16:50:32 -0400261 for floating_ip_state in state.get_floating_ips():
262 manager.floating_ips_client.delete_floating_ip(
263 floating_ip_state.resource_id)
264 for keypair_state in state.get_keypairs():
265 manager.keypairs_client.delete_keypair(keypair_state.name)
266 for volume_state in state.get_volumes():
267 manager.volumes_client.delete_volume(volume_state.resource_id)
Sean Dague70112362012-04-03 13:48:49 -0400268
269 if test_succeeded:
270 logging.info('*** Test succeeded ***')
271 else:
272 logging.info('*** Test had errors ***')
273 return test_succeeded