Simplified stress tests.

Change-Id: I3cc6a14f32d81fa81adb13aed97049c9df2abbb2
diff --git a/tempest/config.py b/tempest/config.py
index a90767e..6da2ddc 100644
--- a/tempest/config.py
+++ b/tempest/config.py
@@ -411,7 +411,23 @@
                help='Maximum number of instances to create during test.'),
     cfg.StrOpt('controller',
                default=None,
-               help='Controller host.')
+               help='Controller host.'),
+    # new stress options
+    cfg.StrOpt('target_controller',
+               default=None,
+               help='Controller host.'),
+    cfg.StrOpt('target_ssh_user',
+               default=None,
+               help='ssh user.'),
+    cfg.StrOpt('target_private_key_path',
+               default=None,
+               help='Path to private key.'),
+    cfg.StrOpt('target_logfiles',
+               default=None,
+               help='regexp for list of log files.'),
+    cfg.StrOpt('log_check_interval',
+               default=60,
+               help='time between log file error checks.')
 ]
 
 
diff --git a/tempest/stress/README.rst b/tempest/stress/README.rst
new file mode 100644
index 0000000..2c431ed
--- /dev/null
+++ b/tempest/stress/README.rst
@@ -0,0 +1,47 @@
+Quanta Research Cambridge OpenStack Stress Test System
+======================================================
+
+Nova is a distributed, asynchronous system that is prone to race condition
+bugs. These bugs will not be easily found during
+functional testing but will be encountered by users in large deployments in a
+way that is hard to debug. The stress test tries to cause these bugs to happen
+in a more controlled environment.
+
+
+Environment
+------------
+This particular framework assumes your working Nova cluster understands Nova 
+API 2.0. The stress tests can read the logs from the cluster. To enable this
+you have to provide the hostname to call 'nova-manage' and
+the private key and user name for ssh to the cluster in the
+[stress] section of tempest.conf. You also need to provide the
+location of the log files:
+
+	target_logfiles = "regexp to all log files to be checked for errors"
+	target_private_key_path = "private ssh key for controller and log file nodes"
+	target_ssh_user = "username for controller and log file nodes"
+	target_controller = "hostname or ip of controller node (for nova-manage)
+	log_check_interval = "time between checking logs for errors (default 60s)"
+
+
+
+Running the sample test
+-----------------------
+
+To test installation, do the following (from the tempest/stress directory):
+
+	./run_stress.py etc/sample-test.json -d 30
+
+This sample test tries to create a few VMs and kill a few VMs.
+
+
+Additional Tools
+----------------
+
+Sometimes the tests don't finish, or there are failures. In these
+cases, you may want to clean out the nova cluster. We have provided
+some scripts to do this in the ``tools`` subdirectory.
+You can use the following script to destroy any keypairs,
+floating ips, and servers:
+
+tempest/stress/tools/cleanup.py
diff --git a/tempest/stress/__init__.py b/tempest/stress/__init__.py
new file mode 100644
index 0000000..1caf74a
--- /dev/null
+++ b/tempest/stress/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2013 Quanta Research Cambridge, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
diff --git a/tempest/stress/actions/__init__.py b/tempest/stress/actions/__init__.py
new file mode 100644
index 0000000..1caf74a
--- /dev/null
+++ b/tempest/stress/actions/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2013 Quanta Research Cambridge, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
diff --git a/tempest/stress/actions/create_destroy_server.py b/tempest/stress/actions/create_destroy_server.py
new file mode 100644
index 0000000..44b149f
--- /dev/null
+++ b/tempest/stress/actions/create_destroy_server.py
@@ -0,0 +1,34 @@
+# Copyright 2013 Quanta Research Cambridge, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+from tempest.common.utils.data_utils import rand_name
+
+
+def create_destroy(manager, logger):
+    image = manager.config.compute.image_ref
+    flavor = manager.config.compute.flavor_ref
+    while True:
+        name = rand_name("instance")
+        logger.info("creating %s" % name)
+        resp, server = manager.servers_client.create_server(
+            name, image, flavor)
+        server_id = server['id']
+        assert(resp.status == 202)
+        manager.servers_client.wait_for_server_status(server_id, 'ACTIVE')
+        logger.info("created %s" % server_id)
+        logger.info("deleting %s" % name)
+        resp, _ = manager.servers_client.delete_server(server_id)
+        assert(resp.status == 204)
+        manager.servers_client.wait_for_server_termination(server_id)
+        logger.info("deleted %s" % server_id)
diff --git a/tempest/stress/cleanup.py b/tempest/stress/cleanup.py
new file mode 100644
index 0000000..b2cb70a
--- /dev/null
+++ b/tempest/stress/cleanup.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2013 Quanta Research Cambridge, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+from tempest import clients
+
+
+def cleanup():
+    admin_manager = clients.AdminManager()
+
+    _, body = admin_manager.servers_client.list_servers({"all_tenants": True})
+    for s in body['servers']:
+        try:
+            admin_manager.servers_client.delete_server(s['id'])
+        except Exception:
+            pass
+
+    for s in body['servers']:
+        try:
+            admin_manager.servers_client.wait_for_server_termination(s['id'])
+        except Exception:
+            pass
+
+    _, keypairs = admin_manager.keypairs_client.list_keypairs()
+    for k in keypairs:
+        try:
+            admin_manager.keypairs_client.delete_keypair(k['name'])
+        except Exception:
+            pass
+
+    _, floating_ips = admin_manager.floating_ips_client.list_floating_ips()
+    for f in floating_ips:
+        try:
+            admin_manager.floating_ips_client.delete_floating_ip(f['id'])
+        except Exception:
+            pass
+
+    _, users = admin_manager.identity_client.get_users()
+    for user in users:
+        if user['name'].startswith("stress_user"):
+            admin_manager.identity_client.delete_user(user['id'])
+
+    _, tenants = admin_manager.identity_client.list_tenants()
+    for tenant in tenants:
+        if tenant['name'].startswith("stress_tenant"):
+            admin_manager.identity_client.delete_tenant(tenant['id'])
diff --git a/tempest/stress/driver.py b/tempest/stress/driver.py
new file mode 100644
index 0000000..51f159d
--- /dev/null
+++ b/tempest/stress/driver.py
@@ -0,0 +1,156 @@
+# Copyright 2013 Quanta Research Cambridge, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+import importlib
+import logging
+import multiprocessing
+import time
+
+from tempest import clients
+from tempest.common import ssh
+from tempest.common.utils.data_utils import rand_name
+from tempest import exceptions
+from tempest.stress import cleanup
+
+admin_manager = clients.AdminManager()
+
+# setup logging to file
+logging.basicConfig(
+    format='%(asctime)s %(process)d %(name)-20s %(levelname)-8s %(message)s',
+    datefmt='%m-%d %H:%M:%S',
+    filename="stress.debug.log",
+    filemode="w",
+    level=logging.DEBUG,
+)
+
+# define a Handler which writes INFO messages or higher to the sys.stdout
+_console = logging.StreamHandler()
+_console.setLevel(logging.INFO)
+# set a format which is simpler for console use
+format_str = '%(asctime)s %(process)d %(name)-20s: %(levelname)-8s %(message)s'
+_formatter = logging.Formatter(format_str)
+# tell the handler to use this format
+_console.setFormatter(_formatter)
+# add the handler to the root logger
+logger = logging.getLogger('tempest.stress')
+logger.addHandler(_console)
+
+
+def do_ssh(command, host):
+    username = admin_manager.config.stress.target_ssh_user
+    key_filename = admin_manager.config.stress.target_private_key_path
+    if not (username and key_filename):
+        return None
+    ssh_client = ssh.Client(host, username, key_filename=key_filename)
+    try:
+        return ssh_client.exec_command(command)
+    except exceptions.SSHExecCommandFailed:
+        return None
+
+
+def _get_compute_nodes(controller):
+    """
+    Returns a list of active compute nodes. List is generated by running
+    nova-manage on the controller.
+    """
+    nodes = []
+    cmd = "nova-manage service list | grep ^nova-compute"
+    output = do_ssh(cmd, controller)
+    if not output:
+        return nodes
+    # For example: nova-compute xg11eth0 nova enabled :-) 2011-10-31 18:57:46
+    # This is fragile but there is, at present, no other way to get this info.
+    for line in output.split('\n'):
+        words = line.split()
+        if len(words) > 0 and words[4] == ":-)":
+            nodes.append(words[1])
+    return nodes
+
+
+def _error_in_logs(logfiles, nodes):
+    """
+    Detect errors in the nova log files on the controller and compute nodes.
+    """
+    grep = 'egrep "ERROR|TRACE" %s' % logfiles
+    for node in nodes:
+        errors = do_ssh(grep, node)
+        if not errors:
+            return None
+        if len(errors) > 0:
+            logger.error('%s: %s' % (node, errors))
+            return errors
+    return None
+
+
+def get_action_function(path):
+    (module_part, _, function) = path.rpartition('.')
+    return getattr(importlib.import_module(module_part), function)
+
+
+def stress_openstack(tests, duration):
+    """
+    Workload driver. Executes an action function against a nova-cluster.
+
+    """
+    logfiles = admin_manager.config.stress.target_logfiles
+    log_check_interval = int(admin_manager.config.stress.log_check_interval)
+    if logfiles:
+        controller = admin_manager.config.stress.target_controller
+        computes = _get_compute_nodes(controller)
+        for node in computes:
+            do_ssh("rm -f %s" % logfiles, node)
+    processes = []
+    for test in tests:
+        if test.get('use_admin', False):
+            manager = admin_manager
+        else:
+            manager = clients.Manager()
+        for _ in xrange(test.get('threads', 1)):
+            if test.get('use_isolated_tenants', False):
+                username = rand_name("stress_user")
+                tenant_name = rand_name("stress_tenant")
+                password = "pass"
+                identity_client = admin_manager.identity_client
+                _, tenant = identity_client.create_tenant(name=tenant_name)
+                identity_client.create_user(username,
+                                            password,
+                                            tenant['id'],
+                                            "email")
+                manager = clients.Manager(username=username,
+                                          password="pass",
+                                          tenant_name=tenant_name)
+            target = get_action_function(test['action'])
+            p = multiprocessing.Process(target=target,
+                                        args=(manager, logger),
+                                        kwargs=test.get('kwargs', {}))
+            processes.append(p)
+            p.start()
+    end_time = time.time() + duration
+    had_errors = False
+    while True:
+        remaining = end_time - time.time()
+        if remaining <= 0:
+            break
+        time.sleep(min(remaining, log_check_interval))
+        if not logfiles:
+            continue
+        errors = _error_in_logs(logfiles, computes)
+        if errors:
+            had_errors = True
+            break
+    for p in processes:
+        p.terminate()
+    if not had_errors:
+        logger.info("cleaning up")
+        cleanup.cleanup()
diff --git a/tempest/stress/etc/sample-test.json b/tempest/stress/etc/sample-test.json
new file mode 100644
index 0000000..5a0189c
--- /dev/null
+++ b/tempest/stress/etc/sample-test.json
@@ -0,0 +1,7 @@
+[{"action": "tempest.stress.actions.create_destroy_server.create_destroy",
+  "threads": 8,
+  "use_admin": false,
+  "use_isolated_tenants": false,
+  "kwargs": {}
+  }
+]
diff --git a/tempest/stress/run_stress.py b/tempest/stress/run_stress.py
new file mode 100755
index 0000000..ef0ec8e
--- /dev/null
+++ b/tempest/stress/run_stress.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2013 Quanta Research Cambridge, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+import argparse
+import json
+
+from tempest.stress import driver
+
+
+def main(ns):
+    tests = json.load(open(ns.tests, 'r'))
+    driver.stress_openstack(tests, ns.duration)
+
+
+parser = argparse.ArgumentParser(description='Run stress tests. ')
+parser.add_argument('-d', '--duration', default=300, type=int,
+                    help="Duration of test.")
+parser.add_argument('tests', help="Name of the file with test description.")
+main(parser.parse_args())
diff --git a/tempest/stress/tools/cleanup.py b/tempest/stress/tools/cleanup.py
new file mode 100755
index 0000000..7139d6c
--- /dev/null
+++ b/tempest/stress/tools/cleanup.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+# Copyright 2013 Quanta Research Cambridge, Inc.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+from tempest.stress import cleanup
+
+
+cleanup.cleanup()