Merge "Add steps to install exim4"
diff --git a/tcp_tests/fixtures/ceph_fixtures.py b/tcp_tests/fixtures/ceph_fixtures.py
index c294542..0b2ef50 100644
--- a/tcp_tests/fixtures/ceph_fixtures.py
+++ b/tcp_tests/fixtures/ceph_fixtures.py
@@ -40,7 +40,7 @@
@pytest.fixture(scope='function')
def ceph_deployed(revert_snapshot, request, config,
hardware, underlay, common_services_deployed,
- ceph_actions):
+ salt_deployed, ceph_actions):
"""Fixture to get or install Ceph services on environment
:param revert_snapshot: fixture that reverts snapshot that is specified
@@ -72,6 +72,7 @@
commands = underlay.read_template(steps_path)
ceph_actions.install(commands)
hardware.create_snapshot(ext.SNAPSHOT.ceph_deployed)
+ salt_deployed.sync_time()
else:
# 1. hardware environment created and powered on
diff --git a/tcp_tests/fixtures/common_fixtures.py b/tcp_tests/fixtures/common_fixtures.py
index 9d6ccba..31f0f1e 100644
--- a/tcp_tests/fixtures/common_fixtures.py
+++ b/tcp_tests/fixtures/common_fixtures.py
@@ -52,6 +52,7 @@
finish_step = "FINISH {} TEST. TOOK {} min {} sec".format(
step_name, minutes, seconds
)
+ print("\n\n")
foot = "\n" + "<" * 5 + "#" * 30 + "[ {} ]" + "#" * 30 + ">" * 5
foot = foot.format(finish_step)
LOG.info(foot)
@@ -69,3 +70,10 @@
steps_mark = request.keywords.get('steps', None)
steps = steps_mark.args[0]
return steps
+
+
+@pytest.fixture(scope='function', autouse=True)
+def func_name(request):
+ """Name of the current test function"""
+ return getattr(request.node.function, '_name',
+ request.node.function.__name__)
diff --git a/tcp_tests/fixtures/common_services_fixtures.py b/tcp_tests/fixtures/common_services_fixtures.py
index 5d4c56a..7d1c73f 100644
--- a/tcp_tests/fixtures/common_services_fixtures.py
+++ b/tcp_tests/fixtures/common_services_fixtures.py
@@ -71,6 +71,7 @@
commands = underlay.read_template(steps_path)
common_services_actions.install(commands)
hardware.create_snapshot(ext.SNAPSHOT.common_services_deployed)
+ salt_deployed.sync_time()
else:
# 1. hardware environment created and powered on
diff --git a/tcp_tests/fixtures/decapod_fixtures.py b/tcp_tests/fixtures/decapod_fixtures.py
index 7f064c5..8e40b41 100644
--- a/tcp_tests/fixtures/decapod_fixtures.py
+++ b/tcp_tests/fixtures/decapod_fixtures.py
@@ -68,6 +68,7 @@
commands = underlay.read_template(steps_path)
decapod_actions.install(commands)
hardware.create_snapshot(ext.SNAPSHOT.decapod_deployed)
+ salt_deployed.sync_time()
else:
# 1. hardware environment created and powered on
diff --git a/tcp_tests/fixtures/k8s_fixtures.py b/tcp_tests/fixtures/k8s_fixtures.py
index 3cacbaf..356a51b 100644
--- a/tcp_tests/fixtures/k8s_fixtures.py
+++ b/tcp_tests/fixtures/k8s_fixtures.py
@@ -38,7 +38,7 @@
@pytest.mark.revert_snapshot(ext.SNAPSHOT.k8s_deployed)
@pytest.fixture(scope='function')
def k8s_deployed(revert_snapshot, request, config, hardware, underlay,
- common_services_deployed, k8s_actions):
+ common_services_deployed, salt_deployed, k8s_actions):
"""Fixture to get or install k8s on environment
:param revert_snapshot: fixture that reverts snapshot that is specified
@@ -71,6 +71,7 @@
commands = underlay.read_template(steps_path)
k8s_actions.install(commands)
hardware.create_snapshot(ext.SNAPSHOT.k8s_deployed)
+ salt_deployed.sync_time()
# Workaround for keepalived hang issue after env revert from snapshot
# see https://mirantis.jira.com/browse/PROD-12038
diff --git a/tcp_tests/fixtures/openstack_fixtures.py b/tcp_tests/fixtures/openstack_fixtures.py
index 98e367c..8e92e77 100644
--- a/tcp_tests/fixtures/openstack_fixtures.py
+++ b/tcp_tests/fixtures/openstack_fixtures.py
@@ -41,7 +41,7 @@
@pytest.fixture(scope='function')
def openstack_deployed(revert_snapshot, request, config,
hardware, underlay, common_services_deployed,
- openstack_actions):
+ salt_deployed, openstack_actions, rally):
"""Fixture to get or install OpenStack services on environment
:param revert_snapshot: fixture that reverts snapshot that is specified
@@ -52,11 +52,13 @@
:param underlay: fixture provides underlay manager
:param common_services_deployed: fixture provides CommonServicesManager
:param openstack_actions: fixture provides OpenstackManager instance
+ :param rally: fixture provides RallyManager instance
:rtype: OpenstackManager
If config.openstack.openstack_installed is not set, this fixture assumes
that the openstack services were not installed, and do the following:
- install openstack services
+ - [optional] prepare docker with rally container
- make snapshot with name 'openstack_deployed'
- return OpenstackManager instance
@@ -64,15 +66,40 @@
the openstack services were already installed, and do the following:
- return OpenstackManager instance
+ If you want to prepare 'rally', please use mark:
+ @pytest.mark.with_rally(rally_node=<str>,
+ prepare_openstack=<bool>,
+ prepare_tempest=<bool>)
+ :param rally_node: first chars of the node name where rally should
+ be started
+ :param prepare_openstack: if True, prepare OpenStack objects for
+ rally tasks: cirros image, private net04
+
If you want to revert 'openstack_deployed' snapshot, please use mark:
@pytest.mark.revert_snapshot("openstack_deployed")
"""
+
# Deploy Openstack cluster
if not config.openstack.openstack_installed:
steps_path = config.openstack_deploy.openstack_steps_path
commands = underlay.read_template(steps_path)
openstack_actions.install(commands)
+
+ # If @pytest.mark.with_rally() is set, then prepare Rally
+ # container for 'openstack_deployed' snapshot.
+ with_rally = request.keywords.get('with_rally', None)
+ if with_rally:
+ prepare_openstack = with_rally.kwargs.get("prepare_openstack",
+ False)
+ prepare_tempest = with_rally.kwargs.get("prepare_tempest", False)
+ if prepare_openstack:
+ rally.prepare_rally_task(target_node='ctl01')
+ if prepare_tempest:
+ rally.prepare_tempest_task()
+ rally.run_container()
+
hardware.create_snapshot(ext.SNAPSHOT.openstack_deployed)
+ salt_deployed.sync_time()
else:
# 1. hardware environment created and powered on
diff --git a/tcp_tests/fixtures/oss_fixtures.py b/tcp_tests/fixtures/oss_fixtures.py
index d46427b..95bbc54 100644
--- a/tcp_tests/fixtures/oss_fixtures.py
+++ b/tcp_tests/fixtures/oss_fixtures.py
@@ -68,6 +68,7 @@
commands = underlay.read_template(steps_path)
oss_actions.install(commands)
hardware.create_snapshot(ext.SNAPSHOT.oss_deployed)
+ salt_deployed.sync_time()
else:
# 1. hardware environment created and powered on
@@ -78,3 +79,18 @@
pass
return oss_actions
+
+
+@pytest.mark.revert_snapshot(ext.SNAPSHOT.oss_deployed)
+@pytest.fixture(scope='function')
+def oss_sl_os_deployed(revert_snapshot,
+ sl_os_deployed,
+ oss_deployed):
+ """Fixture to get or install SL and OpenStack services on environment
+
+ Uses fixtures openstack_deployed and sl_deployed, with 'sl_deployed'
+ top-level snapshot.
+
+ Returns SLManager instance object
+ """
+ return oss_deployed
diff --git a/tcp_tests/fixtures/rally_fixtures.py b/tcp_tests/fixtures/rally_fixtures.py
index 18fa179..335ab88 100644
--- a/tcp_tests/fixtures/rally_fixtures.py
+++ b/tcp_tests/fixtures/rally_fixtures.py
@@ -18,13 +18,19 @@
@pytest.fixture(scope='function')
-def rally(config, underlay):
+def rally(request, config, underlay):
"""Fixture that provides various actions for TCP
+ :param request: fixture provides pytest data
:param config: fixture provides oslo.config
:param underlay: fixture provides underlay manager
:rtype: RallyManager
For use in tests or fixtures to deploy a custom TCP
"""
- return rallymanager.RallyManager(underlay, config.salt.salt_master_host)
+ with_rally = request.keywords.get('with_rally', None)
+ rally_node = "gtw01."
+ if with_rally:
+ rally_node = with_rally.kwargs.get("rally_node", "gtw01.")
+
+ return rallymanager.RallyManager(underlay, rally_node)
diff --git a/tcp_tests/fixtures/salt_fixtures.py b/tcp_tests/fixtures/salt_fixtures.py
index d72b1fc..aff28dc 100644
--- a/tcp_tests/fixtures/salt_fixtures.py
+++ b/tcp_tests/fixtures/salt_fixtures.py
@@ -78,6 +78,7 @@
for n in config.underlay.ssh)]
hardware.create_snapshot(ext.SNAPSHOT.salt_deployed)
+ salt_actions.sync_time()
else:
# 1. hardware environment created and powered on
@@ -87,4 +88,6 @@
# installed TCP API endpoint
pass
+ salt_actions.sync_time()
+
return salt_actions
diff --git a/tcp_tests/fixtures/stacklight_fixtures.py b/tcp_tests/fixtures/stacklight_fixtures.py
index 8028383..c1747b8 100644
--- a/tcp_tests/fixtures/stacklight_fixtures.py
+++ b/tcp_tests/fixtures/stacklight_fixtures.py
@@ -39,7 +39,7 @@
@pytest.fixture(scope='function')
def sl_deployed(revert_snapshot, request, config,
hardware, underlay, common_services_deployed,
- sl_actions):
+ salt_deployed, sl_actions):
"""Fixture to get or install SL services on environment
:param revert_snapshot: fixture that reverts snapshot that is specified
@@ -57,6 +57,7 @@
commands = underlay.read_template(steps_path)
sl_actions.install(commands)
hardware.create_snapshot(ext.SNAPSHOT.sl_deployed)
+ salt_deployed.sync_time()
else:
# 1. hardware environment created and powered on
diff --git a/tcp_tests/fixtures/underlay_fixtures.py b/tcp_tests/fixtures/underlay_fixtures.py
index a1476e3..eacbec9 100644
--- a/tcp_tests/fixtures/underlay_fixtures.py
+++ b/tcp_tests/fixtures/underlay_fixtures.py
@@ -199,7 +199,7 @@
@pytest.fixture(scope='function', autouse=True)
-def grab_versions(request, underlay):
+def grab_versions(request, func_name, underlay):
"""Fixture for grab package versions at the end of test
Marks:
@@ -211,11 +211,10 @@
grab_version = request.keywords.get('grab_versions', None)
def test_fin():
- default_name = getattr(request.node.function, '_name',
- request.node.function.__name__)
- if hasattr(request.node, 'rep_call') and request.node.rep_call.passed \
+ if hasattr(request.node, 'rep_call') and \
+ (request.node.rep_call.passed or request.node.rep_call.failed)\
and grab_version:
artifact_name = utils.extract_name_from_mark(grab_version) or \
- "{}".format(default_name)
+ "{}".format(func_name)
underlay.get_logs(artifact_name)
request.addfinalizer(test_fin)
diff --git a/tcp_tests/helpers/exceptions.py b/tcp_tests/helpers/exceptions.py
index 259880e..f6c2310 100644
--- a/tcp_tests/helpers/exceptions.py
+++ b/tcp_tests/helpers/exceptions.py
@@ -121,3 +121,15 @@
class BaseImageIsNotSet(BaseException):
def __str__(self):
return "Base image for creating VMs is not set!"
+
+
+class SaltPillarError(BaseException):
+ def __init__(self, minion_id, pillar, message=''):
+ super(SaltPillarError, self).__init__()
+ self.minion_id = minion_id
+ self.pillar = pillar
+ self.message = message
+
+ def __str__(self):
+ return ("Salt pillar '{0}' error on minion {1}: {2}"
+ .format(self.minion_id, self.pillar, self.message))
diff --git a/tcp_tests/helpers/log_step.py b/tcp_tests/helpers/log_step.py
index 6ba7164..dbffa58 100644
--- a/tcp_tests/helpers/log_step.py
+++ b/tcp_tests/helpers/log_step.py
@@ -60,7 +60,7 @@
"found!".format(step_num, func.__name__))
test_case_steps = parse_test_doc(func.__doc__)['steps']
try:
- LOG.info(" *** [STEP#{0}] {1} ***".format(
+ LOG.info("\n\n*** [STEP#{0}] {1} ***".format(
step_num,
test_case_steps[step_num - 1]))
except IndexError:
diff --git a/tcp_tests/managers/common_services_manager.py b/tcp_tests/managers/common_services_manager.py
index 658657a..c62114d 100644
--- a/tcp_tests/managers/common_services_manager.py
+++ b/tcp_tests/managers/common_services_manager.py
@@ -12,7 +12,11 @@
# License for the specific language governing permissions and limitations
# under the License.
+from tcp_tests.helpers import exceptions
from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
+from tcp_tests import logger
+
+LOG = logger.logger
class CommonServicesManager(ExecuteCommandsMixin):
@@ -32,3 +36,193 @@
self.execute_commands(commands,
label='Install common services')
self.__config.common_services.common_services_installed = True
+
+ def get_keepalived_vip_minion_id(self, vip):
+ """Get minion ID where keepalived VIP is at the moment"""
+ tgt = 'I@keepalived:cluster:enabled:True'
+ grains = 'ip_interfaces'
+ # Refresh grains first
+ self._salt.run_state(tgt, 'saltutil.refresh_grains')
+ # Get grains
+ result = self._salt.get_grains(tgt=tgt, grains=grains)[0]
+ minion_ids = [
+ minion_id for minion_id, interfaces in result.items()
+ for interface, ips in interfaces.items()
+ for ip in ips
+ if ip == vip
+ ]
+ LOG.debug("VIP '{0}' found on minions {1}".format(vip, minion_ids))
+ if len(minion_ids) != 1:
+ raise Exception("VIP {0} is expected on a single node. Actual "
+ "nodes with VIP: {1}".format(vip, minion_ids))
+ return minion_ids[0]
+
+ def get_keepalived_vips(self):
+ tgt = 'I@keepalived:cluster:enabled:True'
+ pillar = 'keepalived:cluster:instance'
+ return self._salt.get_pillar(tgt=tgt, pillar=pillar)[0]
+
+ def check_keepalived_pillar(self):
+ """Check the keepalived pillars for VIPs
+
+ Check for:
+ - the same VIP is used for the same 'virtual_router_id'
+ - the same password is used for the same 'virtual_router_id'
+ - no 'virtual_router_id' or VIP doubles in different
+ keepalived instances on the same node
+ - no 'priority' doubles inside the same 'virtual_router_id'
+ on different nodes
+
+ :param pillar_vips: dict {
+ <minion_id>: {
+ <keepalived instance>: {
+ <address>: str,
+ <password>: str,
+ <virtual_router_id>: int,
+ <priority>: int
+ },
+ ...
+ },
+ }
+ :return dict: {
+ <str:vip1> : {
+ 'instance_name': <str>
+ 'virtual_router_id': <int>,
+ 'password': <str>,
+ 'nodes' : {<str:node1>: <int:priority>,
+ <str:node2>: <int:priority>,
+ ...},
+ },
+ <str:vip2> : { ...
+ },
+ }
+ """
+
+ def check_single_address(vips, minion_id, instance, data):
+ for vip in vips:
+ if vips[vip]['virtual_router_id'] == data['virtual_router_id']\
+ and (vip != data['address'] or
+ vips[vip]['instance_name'] != instance):
+ message = (
+ "'virtual_router_id': {0} for keepalived instance "
+ "{1}: {2} is already used for {3}: {4} on nodes {5}"
+ .format(data['virtual_router_id'],
+ instance, data['address'],
+ vips[vip]['instance_name'],
+ vip,
+ vips[vip]['nodes'].keys())
+ )
+ raise exceptions.SaltPillarError(
+ minion_id,
+ 'keepalived:cluster:instance',
+ message)
+
+ def check_single_router_id(vips, minion_id, instance, data):
+ for vip in vips:
+ if vips[vip]['virtual_router_id'] != data['virtual_router_id']\
+ and vip == data['address']:
+ message = (
+ "'virtual_router_id': {0} for keepalived instance "
+ "{1}: {2} is not the same as for {3}: {4} on nodes {5}"
+ .format(data['virtual_router_id'],
+ instance, data['address'],
+ vips[vip]['instance_name'],
+ vip,
+ vips[vip]['nodes'].keys())
+ )
+ raise exceptions.SaltPillarError(
+ minion_id,
+ 'keepalived:cluster:instance',
+ message)
+
+ pillar_vips = self.get_keepalived_vips()
+ vips = {}
+ for minion_id in pillar_vips:
+ for instance, data in pillar_vips[minion_id].items():
+ address = data['address']
+ password = data['password']
+ virtual_router_id = data['virtual_router_id']
+ priority = data['priority']
+
+ if address not in vips:
+ # Check that there is the same VIP
+ # for the same virtual_router_id
+ check_single_address(vips, minion_id, instance, data)
+
+ # Add new VIP
+ vips[address] = {
+ 'instance_name': instance,
+ 'virtual_router_id': virtual_router_id,
+ 'password': password,
+ 'nodes': {
+ minion_id: priority,
+ }
+ }
+ else:
+ # Check that there is the same virtual_router_id
+ # for the same VIP
+ check_single_router_id(vips, minion_id, instance, data)
+ if vips[address]['password'] != password:
+ message = (
+ "'password': {0} for keepalived instance "
+ "{1}: {2} is not the same as for {3}: {4} on "
+ "nodes {5}".format(data['password'],
+ instance, data['address'],
+ vips[address]['instance_name'],
+ address,
+ vips[address]['nodes'].keys())
+ )
+ raise exceptions.SaltPillarError(
+ minion_id,
+ 'keepalived:cluster:instance',
+ message)
+
+ if any([priority == prio
+ for node, prio in vips[address]['nodes'].items()]):
+ message = (
+ "'priority': {0} for keepalived instance "
+ "{1}: {2} is the same as for {3}: {4} on "
+ "nodes {5}".format(data['priority'],
+ instance, data['address'],
+ vips[address]['instance_name'],
+ address,
+ vips[address]['nodes'].keys())
+ )
+ raise exceptions.SaltPillarError(
+ minion_id,
+ 'keepalived:cluster:instance',
+ message)
+
+ # Add data to the vips
+ vips[address]['nodes'][minion_id] = priority
+
+ LOG.debug("keepalived pillars check passed: {0}".format(vips))
+ return vips
+
+ def get_haproxy_status(self, tgt):
+ """Get haproxy status for all backends on a specified minion"""
+ cmd = ("echo 'show stat' | "
+ "socat 'UNIX-CONNECT:/run/haproxy/admin.sock' STDIO")
+ # Refresh grains first
+ res = self._salt.run_state(tgt, 'cmd.run', cmd)
+ output = res[0]['return'][0]
+ assert len(output.keys()) == 1, "Please specify a single minion in tgt"
+ minion_id = output.keys()[0]
+
+ haproxy_status = {}
+ for line in output[minion_id].splitlines():
+ if line.startswith("#"):
+ continue
+ status = line.split(",")
+ pxname = status[0]
+ svname = status[1]
+ if pxname not in haproxy_status:
+ haproxy_status[pxname] = {}
+ haproxy_status[pxname][svname] = {
+ 'scur': status[4], # sessions current
+ 'smax': status[5], # sessions max
+ 'status': status[17], # status: UP or DOWN
+ 'rate': status[33], # sessions rate
+ }
+ LOG.debug("Haproxy status: \n{0}".format(haproxy_status))
+ return haproxy_status
diff --git a/tcp_tests/managers/rallymanager.py b/tcp_tests/managers/rallymanager.py
index dcf4309..8282bcc 100644
--- a/tcp_tests/managers/rallymanager.py
+++ b/tcp_tests/managers/rallymanager.py
@@ -14,7 +14,8 @@
import datetime
import json
-from junit_xml import TestSuite, TestCase
+from devops import error
+from functools32 import lru_cache
from tcp_tests import logger
from tcp_tests import settings
@@ -26,147 +27,253 @@
class RallyManager(object):
"""docstring for RallyManager"""
- image_name = 'rallyforge/rally'
- image_version = '0.9.1'
+ image_name = (
+ 'docker-prod-virtual.docker.mirantis.net/'
+ 'mirantis/oscore/rally-tempest')
+ image_version = 'latest'
+ tempest_tag = "16.0.0"
+ designate_tag = "0.2.0"
- def __init__(self, underlay, admin_host):
+ def __init__(self, underlay, rally_node='gtw01.'):
super(RallyManager, self).__init__()
- self._admin_host = admin_host
self._underlay = underlay
+ self._node_name = self.get_target_node(target=rally_node)
- def prepare(self):
- content = """
-sed -i 's|#swift_operator_role = Member|swift_operator_role=SwiftOperator|g' /etc/rally/rally.conf # noqa
-source /home/rally/openrc
-rally-manage db recreate
-rally deployment create --fromenv --name=tempest
-rally verify create-verifier --type tempest --name tempest-verifier
-rally verify configure-verifier
-rally verify configure-verifier --show
-"""
- cmd = "cat > {path} << EOF\n{content}\nEOF".format(
- path='/root/rally/install_tempest.sh', content=content)
- cmd1 = "chmod +x /root/rally/install_tempest.sh"
- cmd2 = "scp ctl01:/root/keystonercv3 /root/rally/openrc"
+ @property
+ @lru_cache(maxsize=None)
+ def image_id(self):
+ LOG.info("Getting image id")
+ cmd = ("docker images | grep {0}| grep {1}| awk '{{print $3}}'"
+ .format(self.image_name, self.image_version))
+ res = self._underlay.check_call(cmd, node_name=self._node_name)
+ image_id = res['stdout'][0].strip()
+ LOG.info("Image ID is {}".format(image_id))
+ return image_id
- with self._underlay.remote(host=self._admin_host) as remote:
- LOG.info("Create rally workdir")
- remote.check_call('mkdir -p /root/rally')
- LOG.info("Create install_tempest.sh")
- remote.check_call(cmd)
- LOG.info("Chmod +x install_tempest.sh")
- remote.check_call(cmd1)
- LOG.info("Copy openstackrc")
- remote.check_call(cmd2)
+ @property
+ @lru_cache(maxsize=None)
+ def docker_id(self):
+ cmd = ("docker ps | grep {image_id} | "
+ "awk '{{print $1}}'| head -1").format(
+ image_id=self.image_id)
+ LOG.info("Getting container id")
+ res = self._underlay.check_call(cmd, node_name=self._node_name)
+ docker_id = res['stdout'][0].strip()
+ LOG.info("Container ID is {}".format(docker_id))
+ return docker_id
- def pull_image(self, version=None):
- version = version or self.image_version
- image = self.image_name
- cmd = ("apt-get -y install docker.io &&"
- " docker pull {image}:{version}".format(image=image,
- version=version))
- with self._underlay.remote(host=self._admin_host) as remote:
- LOG.info("Pull {image}:{version}".format(image=image,
- version=version))
- remote.check_call(cmd)
+ # Move method to underlay
+ def get_target_node(self, target='gtw01.'):
+ return [node_name for node_name
+ in self._underlay.node_names()
+ if node_name.startswith(target)][0]
- with self._underlay.remote(host=self._admin_host) as remote:
- LOG.info("Getting image id")
- cmd = "docker images | grep {0}| awk '{print $3}'".format(
- self.image_version)
- res = remote.check_call(cmd)
- self.image_id = res['stdout'][0].strip()
- LOG.info("Image ID is {}".format(self.image_id))
+ def _docker_exec(self, cmd, timeout=None, verbose=False):
+ docker_cmd = ('docker exec -i {docker_id} bash -c "{cmd}"'
+ .format(cmd=cmd, docker_id=self.docker_id))
+ LOG.info("Executing: {docker_cmd}".format(docker_cmd=docker_cmd))
+ return self._underlay.check_call(docker_cmd, node_name=self._node_name,
+ verbose=verbose, timeout=timeout)
- def run(self):
- with self._underlay.remote(host=self._admin_host) as remote:
- cmd = ("docker run --net host -v /root/rally:/home/rally "
- "-tid -u root {image_id}".format(image_id=self.image_id))
+ def _run(self):
+ """Start the rally container in the background"""
+ with self._underlay.remote(node_name=self._node_name) as remote:
+ cmd = ("docker run --net host -v /root/rally:/home/rally/.rally "
+ "-v /etc/ssl/certs/:/etc/ssl/certs/ "
+ "-tid -u root --entrypoint /bin/bash {image_id}"
+ .format(image_id=self.image_id))
LOG.info("Run Rally container")
remote.check_call(cmd)
- cmd = ("docker ps | grep {image_id} | "
- "awk '{{print $1}}'| head -1").format(
- image_id=self.image_id)
- LOG.info("Getting container id")
- res = remote.check_call(cmd)
- self.docker_id = res['stdout'][0].strip()
- LOG.info("Container ID is {}".format(self.docker_id))
+ def run_container(self, version=None):
+ """Install docker, configure and run rally container"""
+ version = version or self.image_version
+ image = self.image_name
+ LOG.info("Pull {image}:{version}".format(image=image,
+ version=version))
+ cmd = ("apt-get -y install docker.io &&"
+ " docker pull {image}:{version}".format(image=image,
+ version=version))
+ self._underlay.check_call(cmd, node_name=self._node_name)
- def run_tempest(self, test=''):
- docker_exec = ('docker exec -i {docker_id} bash -c "{cmd}"')
- commands = [
- docker_exec.format(cmd="./install_tempest.sh",
- docker_id=self.docker_id),
- docker_exec.format(
- cmd="source /home/rally/openrc && "
- "rally verify start {test}".format(test=test),
- docker_id=self.docker_id),
- docker_exec.format(
- cmd="rally verify report --type json --to result.json",
- docker_id=self.docker_id),
- docker_exec.format(
- cmd="rally verify report --type html --to result.html",
- docker_id=self.docker_id),
+ LOG.info("Create rally workdir")
+ cmd = 'mkdir -p /root/rally; chown 65500 /root/rally'
+ self._underlay.check_call(cmd, node_name=self._node_name)
+
+ LOG.info("Copy keystonercv3")
+ cmd = "cp /root/keystonercv3 /root/rally/keystonercv3"
+ self._underlay.check_call(cmd, node_name=self._node_name)
+ self._run()
+
+ LOG.info("Create rally deployment")
+ self._docker_exec("rally-manage db recreate")
+ self._docker_exec("source /home/rally/.rally/keystonercv3;"
+ "rally deployment create --fromenv --name=Abathur")
+ self._docker_exec("rally deployment list")
+
+ def prepare_rally_task(self, target_node='ctl01.'):
+ """Prepare cirros image and private network for rally task"""
+ ctl_node_name = self._underlay.get_target_node_names(
+ target=target_node)[0]
+ cmds = [
+ ". keystonercv3 ; openstack flavor create --public m1.tiny",
+ ("wget http://download.cirros-cloud.net/0.3.4/"
+ "cirros-0.3.4-i386-disk.img"),
+ (". /root/keystonercv3; glance --timeout 120 image-create "
+ "--name cirros-disk --visibility public --disk-format qcow2 "
+ "--container-format bare --progress "
+ "< /root/cirros-0.3.4-i386-disk.img"),
+ ". /root/keystonercv3; neutron net-create net04",
]
- with self._underlay.remote(host=self._admin_host) as remote:
- LOG.info("Run tempest inside Rally container")
- for cmd in commands:
- remote.check_call(cmd, verbose=True)
- def get_results(self, store=True, store_file='tempest.xml'):
- LOG.info('Storing tests results...')
- res_file_name = 'result.json'
- file_prefix = 'results_' + datetime.datetime.now().strftime(
- '%Y%m%d_%H%M%S') + '_'
- file_dst = '{0}/{1}{2}'.format(
- settings.LOGS_DIR, file_prefix, res_file_name)
- with self._underlay.remote(host=self._admin_host) as remote:
- remote.download(
- '/root/rally/{0}'.format(res_file_name),
- file_dst)
- res = json.load(remote.open('/root/rally/result.json'))
- if not store:
- return res
+ for cmd in cmds:
+ self._underlay.check_call(cmd, node_name=ctl_node_name)
- formatted_tc = []
- failed_cases = [res['test_cases'][case]
- for case in res['test_cases']
- if res['test_cases'][case]['status']
- in 'fail']
- for case in failed_cases:
- if case:
- tc = TestCase(case['name'])
- tc.add_failure_info(case['traceback'])
- formatted_tc.append(tc)
+ def prepare_tempest_task(self):
+ """Configure rally.conf for tempest tests"""
+ pass
+# LOG.info("Modify rally.conf")
+# cmd = ("sed -i 's|#swift_operator_role = Member|"
+# "swift_operator_role=SwiftOperator|g' "
+# "/etc/rally/rally.conf")
+# self._docker_exec(cmd)
- skipped_cases = [res['test_cases'][case]
- for case in res['test_cases']
- if res['test_cases'][case]['status'] in 'skip']
- for case in skipped_cases:
- if case:
- tc = TestCase(case['name'])
- tc.add_skipped_info(case['reason'])
- formatted_tc.append(tc)
+ def create_rally_task(self, task_path, task_content):
+ """Create a file with rally task definition
- error_cases = [res['test_cases'][case] for case in res['test_cases']
- if res['test_cases'][case]['status'] in 'error']
+ :param task_path: path to JSON or YAML file on target node
+ :task_content: string with json or yaml content to store in file
+ """
+ cmd = "cat > {task_path} << EOF\n{task_content}\nEOF".format(
+ task_path=task_path, task_content=task_content)
+ self._underlay.check_call(cmd, node_name=self._node_name)
- for case in error_cases:
- if case:
- tc = TestCase(case['name'])
- tc.add_error_info(case['traceback'])
- formatted_tc.append(tc)
+ def run_task(self, task='', timeout=None, raise_on_timeout=True,
+ verbose=False):
+ """Run rally task
- success = [res['test_cases'][case] for case in res['test_cases']
- if res['test_cases'][case]['status'] in 'success']
- for case in success:
- if case:
- tc = TestCase(case['name'])
- formatted_tc.append(tc)
-
- ts = TestSuite("tempest", formatted_tc)
- with open(store_file, 'w') as f:
- ts.to_file(f, [ts], prettyprint=False)
-
+ :param taks: path to json or yaml file with the task definition
+ :param raise_on_timeout: bool, ignore TimeoutError if False
+ :param verbose: show rally output to console if True
+ """
+ try:
+ res = self._docker_exec(
+ "rally task start {task}".format(task=task),
+ timeout=timeout,
+ verbose=verbose)
+ except error.TimeoutError:
+ if raise_on_timeout:
+ raise
+ else:
+ res = None
+ pass
return res
+
+ # Updated to replace the OpenStackManager method run_tempest
+ def run_tempest(self, conf_name='/var/lib/lvm_mcp.conf',
+ pattern='set=smoke', concurrency=0, timeout=None,
+ report_prefix='', report_types=None):
+ """Run tempest tests
+
+ :param conf_name: tempest config placed in the rally container
+ :param pattern: tempest testcase name or one of existing 'set=...'
+ :param concurrency: how many threads to use in parallel. 0 means
+ to take the amount of the cores on the node
+ <self._node_name>.
+ :param timeout: stop tempest tests after specified timeout.
+ :param report_prefix: str, prefix for report filenames. Usually the
+ output of the fixture 'func_name'
+ :param report_types: list of the report types that need to download
+ from the environment: ['html', 'xml', 'json'].
+ None by default.
+ """
+ report_types = report_types or []
+
+ cmd = (
+ "cat > /root/rally/install_tempest.sh << EOF\n"
+ "rally verify create-verifier"
+ " --type tempest "
+ " --name tempest-verifier"
+ " --source /var/lib/tempest"
+ " --version {tempest_tag}"
+ " --system-wide\n"
+ "rally verify add-verifier-ext"
+ " --source /var/lib/designate-tempest-plugin"
+ " --version {designate_tag}\n"
+ "rally verify configure-verifier --extend {tempest_conf}\n"
+ "rally verify configure-verifier --show\n"
+ "EOF".format(tempest_tag=self.tempest_tag,
+ designate_tag=self.designate_tag,
+ tempest_conf=conf_name))
+ with self._underlay.remote(node_name=self._node_name) as remote:
+ LOG.info("Create install_tempest.sh")
+ remote.check_call(cmd)
+ remote.check_call("chmod +x /root/rally/install_tempest.sh")
+
+ LOG.info("Run tempest inside Rally container")
+ self._docker_exec("/home/rally/.rally/install_tempest.sh")
+ self._docker_exec(
+ ("source /home/rally/.rally/keystonercv3 && "
+ "rally verify start --skip-list /var/lib/mcp_skip.list "
+ " --concurrency {concurrency} --pattern {pattern}"
+ .format(concurrency=concurrency, pattern=pattern)),
+ timeout=timeout, verbose=True)
+ if report_prefix:
+ report_filename = '{0}_report_{1}'.format(
+ report_prefix,
+ datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
+ else:
+ report_filename = 'report_{1}'.format(
+ datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
+ docker_file_prefix = '/home/rally/.rally/' + report_filename
+
+ # Create reports
+ if 'xml' in report_types:
+ self._docker_exec(
+ "rally verify report --type junit-xml --to {0}.xml"
+ .format(docker_file_prefix))
+ if 'html' in report_types:
+ self._docker_exec(
+ "rally verify report --type html --to {0}.html"
+ .format(docker_file_prefix))
+ # Always create report in JSON to return results into test case
+ # However, it won't be downloaded until ('json' in report_prefix)
+ self._docker_exec("rally verify report --type json --to {0}.json"
+ .format(docker_file_prefix))
+
+ # Download reports to the settings.LOGS_DIR
+ file_src_prefix = '/root/rally/{0}'.format(report_filename)
+ file_dst_prefix = '{0}/{1}'.format(settings.LOGS_DIR, report_filename)
+ with self._underlay.remote(node_name=self._node_name) as remote:
+ for suffix in report_types:
+ remote.download(file_src_prefix + '.' + suffix,
+ file_dst_prefix + '.' + suffix)
+ res = json.load(remote.open(file_src_prefix + '.json'))
+
+ # Get latest verification ID to find the lates testcases in the report
+ vtime = {vdata['finished_at']: vid
+ for vid, vdata in res['verifications'].items()}
+ vlatest_id = vtime[max(vtime.keys())]
+
+ # Each status has the dict with pairs:
+ # <status>: {
+ # <case_name>: <case_details>,
+ # }
+ formatted_tc = {
+ 'success': {},
+ 'fail': {},
+ 'xfail': {},
+ 'skip': {}
+ }
+
+ for tname, tdata in res['tests'].items():
+ status = tdata['by_verification'][vlatest_id]['status']
+ details = tdata['by_verification'][vlatest_id].get('details', '')
+ if status not in formatted_tc:
+ # Fail if tempest return a new status that may be
+ # necessary to take into account in test cases
+ raise Exception("Unknown testcase {0} status: {1} "
+ .format(tname, status))
+ formatted_tc[status][tdata['name']] = details
+ LOG.debug("Formatted testcases: {0}".format(formatted_tc))
+ return formatted_tc
diff --git a/tcp_tests/managers/saltmanager.py b/tcp_tests/managers/saltmanager.py
index 4a58e93..1ff5324 100644
--- a/tcp_tests/managers/saltmanager.py
+++ b/tcp_tests/managers/saltmanager.py
@@ -17,7 +17,8 @@
from collections import defaultdict
from datetime import datetime
-from pepper.libpepper import Pepper
+from pepper import libpepper
+from tcp_tests.helpers import utils
from tcp_tests import settings
from tcp_tests import logger
from tcp_tests.managers.execute_commands import ExecuteCommandsMixin
@@ -94,7 +95,7 @@
url = "http://{host}:{port}".format(
host=self.host, port=self.port)
LOG.info("Connecting to Salt API {0}".format(url))
- self.__api = Pepper(url)
+ self.__api = libpepper.Pepper(url)
self.__session_start = login()
return self.__api
@@ -158,6 +159,10 @@
result = self.local(tgt=tgt, fun='pillar.get', args=pillar)
return result['return']
+ def get_grains(self, tgt, grains):
+ result = self.local(tgt=tgt, fun='grains.get', args=grains)
+ return result['return']
+
def get_ssh_data(self):
"""Generate ssh config for Underlay
@@ -192,3 +197,30 @@
host(k, next(i for i in v['ipv4'] if i in pool_net))
for k, v in hosts.items()
if next(i for i in v['ipv4'] if i in pool_net)]
+
+ def service_status(self, tgt, service):
+ result = self.local(tgt=tgt, fun='service.status', args=service)
+ return result['return']
+
+ def service_restart(self, tgt, service):
+ result = self.local(tgt=tgt, fun='service.restart', args=service)
+ return result['return']
+
+ def service_stop(self, tgt, service):
+ result = self.local(tgt=tgt, fun='service.stop', args=service)
+ return result['return']
+
+ @utils.retry(3, exception=libpepper.PepperException)
+ def sync_time(self, tgt='*'):
+ LOG.info("NTP time sync on the salt minions '{0}'".format(tgt))
+ # Force authentication update on the next API access
+ # because previous authentication most probably is not valid
+ # before or after time sync.
+ self.__api = None
+ self.run_state(
+ tgt,
+ 'cmd.run', 'service ntp stop; ntpd -gq; service ntp start')
+ new_time_res = self.run_state(tgt, 'cmd.run', 'date')
+ for node_name, time in sorted(new_time_res[0]['return'][0].items()):
+ LOG.info("{0}: {1}".format(node_name, time))
+ self.__api = None
diff --git a/tcp_tests/managers/underlay_ssh_manager.py b/tcp_tests/managers/underlay_ssh_manager.py
index 8b3da06..7d3da96 100644
--- a/tcp_tests/managers/underlay_ssh_manager.py
+++ b/tcp_tests/managers/underlay_ssh_manager.py
@@ -363,12 +363,13 @@
with self.remote(node_name=host) as remote:
remote.upload(source, destination)
- def get_random_node(self):
+ def get_random_node(self, node_names=None):
"""Get random node name
+ :param node_names: list of strings
:return: str, name of node
"""
- return random.choice(self.node_names())
+ return random.choice(node_names or self.node_names())
def yaml_editor(self, file_path, node_name=None, host=None,
address_pool=None):
@@ -421,21 +422,24 @@
t='{0}_log.tar.gz'.format(artifact_name), d='/var/log'))
minion_nodes = [ssh for ssh in self.config_ssh
if node_role not in ssh['roles']]
- for node in minion_nodes:
- try:
- with self.remote(host=node['host']) as r_node:
- r_node.check_call((
- 'tar '
- '--absolute-names '
- '--warning=no-file-changed '
- '-czf {t} {d}'.format(
- t='{0}.tar.gz'.format(node['node_name']),
- d='/var/log')),
- verbose=True, raise_on_err=False)
- except Exception:
- LOG.info("Can not ssh for node {}".format(node))
+
with self.remote(master_node['node_name']) as r:
for node in minion_nodes:
+ LOG.info("Archiving logs on the node {0}"
+ .format(node['node_name']))
+ r.check_call((
+ "salt '{n}*' cmd.run "
+ "'tar "
+ "--absolute-names "
+ "--warning=no-file-changed "
+ "-czf {t} {d}'".format(
+ n=node['node_name'],
+ t='{0}.tar.gz'.format(node['node_name']),
+ d='/var/log')),
+ raise_on_err=False)
+
+ LOG.info("Copying logs from {0} to {1}"
+ .format(node['node_name'], master_node['node_name']))
packages_minion_cmd = ("salt '{0}*' cmd.run "
"'dpkg -l' > /var/log/"
"{0}_packages.output".format(
@@ -443,9 +447,58 @@
r.check_call(packages_minion_cmd)
r.check_call("rsync {0}:/root/*.tar.gz "
"/var/log/".format(node['node_name']),
- verbose=True, raise_on_err=False)
- r.check_call(cmd)
+ raise_on_err=False)
+ r.check_call(cmd)
r.check_call(tar_cmd)
- r.download(destination='{0}_log.tar.gz'.format(artifact_name),
- target=os.getcwd())
+
+ destination_name = '{0}_log.tar.gz'.format(artifact_name)
+ LOG.info("Downloading the artifact {0}".format(destination_name))
+ r.download(destination=destination_name, target=os.getcwd())
+
+ def delayed_call(
+ self, cmd,
+ node_name=None, host=None, address_pool=None,
+ verbose=True, timeout=5,
+ delay_min=None, delay_max=None):
+ """Delayed call of the specified command in background
+
+ :param delay_min: minimum delay in minutes before run
+ the command
+ :param delay_max: maximum delay in minutes before run
+ the command
+ The command will be started at random time in the range
+ from delay_min to delay_max in minutes from 'now'
+ using the command 'at'.
+
+ 'now' is rounded to integer by 'at' command, i.e.:
+ now(28 min 59 sec) == 28 min 00 sec.
+
+ So, if delay_min=1 , the command may start in range from
+ 1 sec to 60 sec.
+
+ If delay_min and delay_max are None, then the command will
+ be executed in the background right now.
+ """
+ time_min = delay_min or delay_max
+ time_max = delay_max or delay_min
+
+ delay = None
+ if time_min is not None and time_max is not None:
+ delay = random.randint(time_min, time_max)
+
+ delay_str = ''
+ if delay:
+ delay_str = " + {0} min".format(delay)
+
+ delay_cmd = "cat << EOF | at now {0}\n{1}\nEOF".format(delay_str, cmd)
+
+ self.check_call(delay_cmd, node_name=node_name, host=host,
+ address_pool=address_pool, verbose=verbose,
+ timeout=timeout)
+
+ def get_target_node_names(self, target='gtw01.'):
+ """Get all node names which names starts with <target>"""
+ return [node_name for node_name
+ in self.node_names()
+ if node_name.startswith(target)]
diff --git a/tcp_tests/tests/system/conftest.py b/tcp_tests/tests/system/conftest.py
index a4a72a2..64288ab 100644
--- a/tcp_tests/tests/system/conftest.py
+++ b/tcp_tests/tests/system/conftest.py
@@ -32,6 +32,7 @@
'show_step',
'revert_snapshot',
'snapshot',
+ 'func_name',
# config_fixtures
'config',
# underlay_fixtures
@@ -51,6 +52,7 @@
# oss_fixtures
'oss_actions',
'oss_deployed',
+ 'oss_sl_os_deployed',
# decapod_fixtures
'decapod_actions',
'decapod_deployed',
diff --git a/tcp_tests/tests/system/test_failover_openstack_services.py b/tcp_tests/tests/system/test_failover_openstack_services.py
new file mode 100644
index 0000000..37cff72
--- /dev/null
+++ b/tcp_tests/tests/system/test_failover_openstack_services.py
@@ -0,0 +1,602 @@
+# Copyright 2017 Mirantis, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import pytest
+
+from tcp_tests import logger
+
+LOG = logger.logger
+
+
+def rally_load_task(times=10, concurrency=2):
+ return """{{
+ "NovaServers.boot_and_delete_server": [
+ {{
+ "args": {{
+ "flavor": {{
+ "name": "m1.tiny"
+ }},
+ "image": {{
+ "name": "^cirros.*-disk$"
+ }},
+ "auto_assign_nic": true
+ }},
+ "runner": {{
+ "type": "constant",
+ "times": {times},
+ "concurrency": {concurrency}
+ }},
+ "context": {{
+ "users": {{
+ "tenants": 3,
+ "users_per_tenant": 2
+ }},
+ "network": {{
+ "start_cidr": "10.2.0.0/24",
+ "networks_per_tenant": 2
+ }}
+ }}
+ }}
+ ]
+ }}""".format(times=times, concurrency=concurrency)
+
+
+class TestFailoverOpenStackServices(object):
+ """Test class for testing MCP services failover"""
+
+ def show_failed_msg(self, failed):
+ return "There are failed tempest tests:\n\n {0}".format(
+ '\n\n '.join([(name + ': ' + detail)
+ for name, detail in failed.items()]))
+
+ def create_and_run_rally_load_task(
+ self, rally, times, concurrency, timeout, raise_on_timeout=False):
+
+ rally.create_rally_task('/root/rally/rally_load_task.json',
+ rally_load_task(times, concurrency))
+ LOG.info("Running rally load task: {0} iterations with concurrency {1}"
+ ", timeout: {2} sec".format(times, concurrency, timeout))
+
+ # Run rally task with created task file
+ res = rally.run_task('/home/rally/.rally/rally_load_task.json',
+ timeout=timeout,
+ raise_on_timeout=raise_on_timeout,
+ verbose=False)
+ # LOG only lines related to the task iterations,
+ # skip all other setup/teardown messages
+ for line in res['stdout']:
+ if 'rally.task.runner' in line:
+ LOG.info(line.strip())
+
+ def get_ps_time(self, underlay, process_name, node_names):
+ """Get the started datetime of the process on the specified nodes
+
+ Returns the dict {<node_name>: <str>, } where <str> is the 'ps' output
+ """
+ res = {
+ node_name: underlay.check_call(
+ "ps -eo lstart,cmd|grep [^]]{0}".format(process_name),
+ node_name=node_name, raise_on_err=False)['stdout_str']
+ for node_name in node_names
+ }
+ return res
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+ def test_restart_keepalived(self, func_name, underlay, config,
+ openstack_deployed, sl_os_deployed,
+ common_services_actions,
+ salt_actions, openstack_actions,
+ rally, show_step):
+ """Test restart keepalived on ctl* nodes
+
+ Scenario:
+ 1. Set keepalived to restart on ctl* nodes in few minutes
+ 2. Run rally task to generate load (some tasks should fail
+ because of step 2)
+ 3. Check that keepalived was restarted on ctl* nodes
+ 4. Run tempest smoke after failover
+ 5. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ """
+ # TR case #4756965
+ common_services_actions.check_keepalived_pillar()
+ salt = salt_actions
+
+ ctl_node_names = underlay.get_target_node_names(
+ target='ctl')
+
+ # Get the ps output with datetime of the process
+ ps_before = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+ assert all(["keepalived" in p for n, p in ps_before.items()]), (
+ "'keepalived' is not running on some nodes: {0}".format(ps_before))
+
+ # STEP #1
+ show_step(1)
+ underlay.delayed_call(
+ "salt 'ctl*' service.restart keepalived",
+ host=config.salt.salt_master_host,
+ delay_min=2,
+ delay_max=3)
+
+ # STEP #2
+ show_step(2)
+ # Run rally task with created task file
+ self.create_and_run_rally_load_task(
+ rally, times=60, concurrency=6, timeout=900)
+
+ # STEP #3
+ show_step(3)
+ ret = salt.service_status("I@nova:controller:enabled:True",
+ "keepalived")
+ LOG.info(ret)
+ ps_after = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+ for node_name, ps in ps_before.items():
+ assert ps_after[node_name] and (ps != ps_after[node_name]), (
+ "Keepalived wasn't restarted on node {0}".format(node_name))
+
+ # STEP #4
+ show_step(4)
+ results = rally.run_tempest(pattern='set=smoke',
+ report_prefix=func_name,
+ timeout=1800)
+ # Step #5
+ show_step(5)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+ def test_stop_keepalived(self, func_name, underlay, config,
+ openstack_deployed, sl_os_deployed,
+ common_services_actions,
+ salt_actions, openstack_actions,
+ rally, show_step):
+ """Test stop keepalived on ctl node with VIP under load
+
+ Scenario:
+ 1. Find controller minion id with VIP
+ 2. Set keepalived to stop on the ctl node with VIP in few minutes
+ 3. Run rally task to generate load (some tasks should fail
+ because of step 2)
+ 4. Check that keepalived was stopped on the ctl node with VIP
+ 5. Run tempest smoke after failover
+ 6. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ """
+ # TR case #3385682
+ common_services_actions.check_keepalived_pillar()
+ salt = salt_actions
+
+ ctl_node_names = underlay.get_target_node_names(
+ target='ctl')
+
+ # Get the ps output with datetime of the process
+ ps_before = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+ assert all(["keepalived" in p for n, p in ps_before.items()]), (
+ "'keepalived' is not running on some nodes: {0}".format(ps_before))
+
+ # STEP #1
+ show_step(1)
+ ctl_vip_pillar = salt.get_pillar(
+ tgt="I@nova:controller:enabled:True",
+ pillar="_param:cluster_vip_address")[0]
+ vip = [vip for minion_id, vip in ctl_vip_pillar.items()][0]
+ minion_vip = common_services_actions.get_keepalived_vip_minion_id(vip)
+ LOG.info("VIP {0} is on {1}".format(vip, minion_vip))
+
+ # STEP #2
+ show_step(2)
+ underlay.delayed_call(
+ "salt '{0}' service.stop keepalived".format(minion_vip),
+ host=config.salt.salt_master_host,
+ delay_min=2,
+ delay_max=3)
+
+ # STEP #3
+ show_step(3)
+ # Run rally task with created task file
+ self.create_and_run_rally_load_task(
+ rally, times=60, concurrency=6, timeout=900)
+
+ # STEP #4
+ show_step(4)
+ ret = salt.service_status("I@nova:controller:enabled:True",
+ "keepalived")
+ LOG.info(ret)
+ ps_after = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+
+ for node_name, ps in ps_before.items():
+ if node_name == minion_vip:
+ # Check that keepalived actually stopped on <minion_vip> node
+ assert not ps_after[node_name], (
+ "Keepalived was not stopped on node {0}"
+ .format(minion_vip))
+ else:
+ # Check that keepalived on other ctl nodes was not restarted
+ assert ps == ps_after[node_name], (
+ "Keepalived was restarted while it shouldn't!")
+
+ # STEP #5
+ show_step(5)
+ results = rally.run_tempest(pattern='set=smoke',
+ report_prefix=func_name,
+ timeout=1800)
+ # Step #6
+ show_step(6)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+ def test_kill_keepalived(self, func_name, underlay, config,
+ openstack_deployed, sl_os_deployed,
+ common_services_actions,
+ salt_actions, openstack_actions,
+ rally, show_step):
+ """Test kill keepalived and haproxy on ctl node with VIP under load
+
+ Scenario:
+ 1. Find controller minion id with VIP
+ 2. Set keepalived to be killed on the ctl node with VIP
+ in few minutes, TR case #3385683
+ 3. Run rally task to generate load (some tasks should fail
+ because of step 2)
+ 4. Check that keepalived was killed on the ctl node with VIP
+ 5. Check that SL sent a e-mail notification about the failed
+ keepalived service, and then remove the VIP remaining
+ on the previous VIP node during running rally task with
+ load.
+ 6. Check that VIP was actually migrated on a new node
+ 7. Find controller minion id with migrated VIP
+ 8. Set haproxy to be killed on the ctl node with VIP
+ in few minutes, TR case #4753980
+ 9. Run rally task to generate load (some tasks should fail
+ because of step 7)
+ 10. Check that haproxy was killed on the ctl node with VIP
+ and started again by systemd
+ 11. Run tempest smoke after failover
+ 12. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ """
+ common_services_actions.check_keepalived_pillar()
+ salt = salt_actions
+
+ ctl_node_names = underlay.get_target_node_names(
+ target='ctl')
+
+ # Keepalived case
+ # STEP #1
+ show_step(1)
+ # Get the ps output with datetime of the process
+ ps_before = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+ assert all(["keepalived" in p for n, p in ps_before.items()]), (
+ "'keepalived' is not running on some nodes: {0}".format(ps_before))
+
+ ctl_vip_pillar = salt.get_pillar(
+ tgt="I@nova:controller:enabled:True",
+ pillar="_param:cluster_vip_address")[0]
+ vip = [vip for minion_id, vip in ctl_vip_pillar.items()][0]
+ minion_vip = common_services_actions.get_keepalived_vip_minion_id(vip)
+ LOG.info("VIP {0} is on {1}".format(vip, minion_vip))
+
+ # STEP #2
+ show_step(2)
+ underlay.delayed_call(
+ "salt '{0}' cmd.run 'killall -9 keepalived'".format(minion_vip),
+ host=config.salt.salt_master_host,
+ delay_min=2,
+ delay_max=3)
+
+ LOG.info("'at -l':\n" + underlay.check_call(
+ "at -l", host=config.salt.salt_master_host)['stdout_str'])
+
+ # STEP #3
+ show_step(3)
+ # Run rally task with created task file
+ self.create_and_run_rally_load_task(
+ rally, times=60, concurrency=4, timeout=900)
+
+ # STEP #4
+ show_step(4)
+ ret = salt.service_status("I@nova:controller:enabled:True",
+ "keepalived")
+ LOG.info(ret)
+ ps_after = self.get_ps_time(underlay, "keepalived", ctl_node_names)
+
+ for node_name, ps in ps_before.items():
+ if node_name == minion_vip:
+ # Check that keepalived actually stopped on <minion_vip> node
+ assert not ps_after[node_name], (
+ "Keepalived was not stopped on node {0}"
+ .format(minion_vip))
+ else:
+ # Check that keepalived on other ctl nodes was not restarted
+ assert ps == ps_after[node_name], (
+ "Keepalived was restarted while it shouldn't!")
+ # STEP #5
+ show_step(5)
+ # TODO(ddmitriev):
+ # 5. Check that SL sent a e-mail notification about the failed
+ # keepalived service, and then remove the VIP remaining
+ # on the node after killing keepalived.
+ # Alternative: check prometheus alerts list on mon*:
+ # curl http://localhost:15011/api/v1/alerts
+
+ # Remove the VIP address manually because
+ # the killed keepalived cannot do it
+ underlay.delayed_call(
+ "salt '{0}' cmd.run 'ip a d {1}/32 dev ens4'"
+ .format(minion_vip, vip),
+ host=config.salt.salt_master_host,
+ delay_min=2,
+ delay_max=3)
+ # Run rally task with created task file
+ self.create_and_run_rally_load_task(
+ rally, times=60, concurrency=4, timeout=900)
+
+ # STEP #6
+ show_step(6)
+ # Check that VIP has been actually migrated to a new node
+ new_minion_vip = common_services_actions.get_keepalived_vip_minion_id(
+ vip)
+ LOG.info("Migrated VIP {0} is on {1}".format(vip, new_minion_vip))
+ assert new_minion_vip != minion_vip, (
+ "VIP {0} wasn't migrated from {1} after killing keepalived!"
+ .format(vip, new_minion_vip))
+ common_services_actions.check_keepalived_pillar()
+
+ # Haproxy case
+ # STEP #7
+ show_step(7)
+ # Get the ps output with datetime of the process
+ ps_before = self.get_ps_time(underlay, "haproxy", ctl_node_names)
+ assert all(["haproxy" in p for n, p in ps_before.items()]), (
+ "'haproxy' is not running on some nodes: {0}".format(ps_before))
+
+ # STEP #8
+ show_step(8)
+ underlay.delayed_call(
+ "salt '{0}' cmd.run 'killall -9 haproxy'".format(new_minion_vip),
+ host=config.salt.salt_master_host,
+ delay_min=2,
+ delay_max=3)
+
+ LOG.info("'at -l':\n" + underlay.check_call(
+ "at -l", host=config.salt.salt_master_host)['stdout_str'])
+
+ # STEP #9
+ show_step(9)
+ # Run rally task with created task file
+ self.create_and_run_rally_load_task(
+ rally, times=200, concurrency=4, timeout=1800)
+
+ # STEP #10
+ show_step(10)
+ ret = salt.service_status("I@nova:controller:enabled:True",
+ "haproxy")
+ LOG.info(ret)
+ ps_after = self.get_ps_time(underlay, "haproxy", ctl_node_names)
+
+ for node_name, ps in ps_before.items():
+ if node_name == new_minion_vip:
+ # Check that haproxy has been actually restarted
+ # on <new_minion_vip> node
+ assert ps_after[node_name] and (ps != ps_after[node_name]), (
+ "Haproxy wasn't restarted on node {0}: {1}"
+ .format(node_name, ps_after[node_name]))
+ else:
+ # Check that haproxy on other ctl nodes was not restarted
+ assert ps == ps_after[node_name], (
+ "Haproxy was restarted while it shouldn't on node {0}"
+ .format(node_name))
+
+ # STEP #11
+ show_step(11)
+ results = rally.run_tempest(pattern='set=smoke',
+ report_prefix=func_name,
+ timeout=1800)
+ # Step #12
+ show_step(12)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")
+
+ @pytest.mark.grab_versions
+ @pytest.mark.fail_snapshot
+ @pytest.mark.with_rally(rally_node="gtw01.", prepare_openstack=True)
+ def test_kill_rabbit_galera(self, func_name, underlay, config,
+ openstack_deployed, sl_os_deployed,
+ common_services_actions,
+ salt_actions, openstack_actions,
+ rally, show_step):
+ """Test kill rabbitmq and galera on ctl node with VIP under load
+
+ Scenario:
+ 1. Find controller minion id with VIP
+ 2. Set rabbitmq_server to be killed on a random ctl node
+ in few minutes, TR case #3385677
+ 3. Run rally task to generate load
+ 4. Check that rabbitmq_server was killed on the ctl node with VIP
+ 5. Find controller minion id with Galera which is receiving
+ connections
+ 6. Set mysql server to be killed in few minutes, TR case #4753976
+ 7. Run rally task to generate load
+ 8. Check that mysql was killed and started again by systemd
+ 9. Check galera cluster status and replication
+ 10. Run tempest smoke after failover
+ 11. Check tempest report for failed tests
+
+ Requiremets:
+ - Salt cluster
+ - OpenStack cluster
+ """
+ common_services_actions.check_keepalived_pillar()
+ salt = salt_actions
+
+ ctl_node_names = underlay.get_target_node_names(
+ target='ctl')
+
+ # Rabbitmq case
+ # STEP #1
+ show_step(1)
+ # Get the ps output with datetime of the process
+ ps_before = self.get_ps_time(
+ underlay, "rabbitmq_server", ctl_node_names)
+ assert all(["rabbitmq_server" in p for n, p in ps_before.items()]), (
+ "'rabbitmq_server' is not running on some nodes: {0}"
+ .format(ps_before))
+
+ ctl_vip_pillar = salt.get_pillar(
+ tgt="I@nova:controller:enabled:True",
+ pillar="_param:cluster_vip_address")[0]
+ vip = [vip for minion_id, vip in ctl_vip_pillar.items()][0]
+ ctl_minions = ctl_vip_pillar.keys()
+ minion_vip = common_services_actions.get_keepalived_vip_minion_id(vip)
+ LOG.info("VIP {0} is on {1}".format(vip, minion_vip))
+
+ # STEP #2
+ show_step(2)
+
+ ctl_minion = underlay.get_random_node(ctl_minions)
+ ctl_node_name = salt_actions.get_grains(
+ tgt=ctl_minion, grains='fqdn')[0][ctl_minion]
+ LOG.info("Scheduling to kill rabbitmq on the minion {0}"
+ .format(ctl_minion))
+ underlay.delayed_call(
+ "salt '{0}' cmd.run 'killall -9 -u rabbitmq'".format(ctl_minion),
+ host=config.salt.salt_master_host,
+ delay_min=2,
+ delay_max=3)
+
+ LOG.info("'at -l':\n" + underlay.check_call(
+ "at -l", host=config.salt.salt_master_host)['stdout_str'])
+
+ # STEP #3
+ show_step(3)
+ # Run rally task with created task file
+ self.create_and_run_rally_load_task(
+ rally, times=60, concurrency=4, timeout=900)
+
+ # STEP #4
+ show_step(4)
+ ps_after = self.get_ps_time(underlay,
+ "rabbitmq_server",
+ ctl_node_names)
+
+ for node_name, ps in ps_before.items():
+ if node_name == ctl_node_name:
+ # Check that rabbitmq_server has been actually stopped
+ # on <minion_vip> node
+ assert not ps_after[node_name], (
+ "'rabbitmq_server' was not stopped on node {0}"
+ .format(minion_vip))
+ else:
+ # Check that rabbitmq_server on other ctl nodes
+ # was not restarted
+ assert ps == ps_after[node_name], (
+ "'rabbitmq_server' was restarted while it shouldn't!")
+
+ # Mysql case
+ # STEP #5
+ show_step(5)
+ # At first, ensure that mysql is running on all controllers
+ ps_before = self.get_ps_time(
+ underlay, "mysqld", ctl_node_names)
+ assert all(["mysqld" in p for n, p in ps_before.items()]), (
+ "'mysqld' is not running on some nodes: {0}"
+ .format(ps_before))
+
+ # Check haproxy status on the node with VIP and find the mysql backend
+ # which is receiving the connections
+ haproxy_status = common_services_actions.get_haproxy_status(minion_vip)
+ mysql_status = haproxy_status['mysql_cluster']
+ mysql_tgt = ''
+ scur = 0
+ for svname in mysql_status.keys():
+ if svname == "FRONTEND" or svname == "BACKEND":
+ continue
+ snew = int(mysql_status[svname]['scur'])
+ if scur < snew:
+ scur = snew
+ mysql_tgt = svname + '*'
+ assert scur > 0, ("No sessions to 'mysql_cluster' haproxy backend on "
+ "the node with VIP, something wrong with cluster.")
+
+ # STEP #6
+ show_step(6)
+ LOG.info("Scheduling to kill mysqld on the minion {0}"
+ .format(ctl_minion))
+ underlay.delayed_call(
+ "salt '{0}' cmd.run 'killall -9 -u mysql'".format(mysql_tgt),
+ host=config.salt.salt_master_host,
+ delay_min=2,
+ delay_max=3)
+
+ LOG.info("'at -l':\n" + underlay.check_call(
+ "at -l", host=config.salt.salt_master_host)['stdout_str'])
+
+ # STEP #7
+ show_step(7)
+ # Run rally task with created task file
+ self.create_and_run_rally_load_task(
+ rally, times=60, concurrency=4, timeout=900)
+
+ # STEP #8
+ show_step(8)
+ ret = salt.service_status("I@nova:controller:enabled:True",
+ "mysql")
+ LOG.info(ret)
+ ps_after = self.get_ps_time(underlay, "mysqld", ctl_node_names)
+
+ for node_name, ps in ps_before.items():
+ if node_name == minion_vip:
+ # Check that mysql actually restarted on <minion_vip> node
+ assert ps_after[node_name] and (ps != ps_after[node_name]), (
+ "Mysql wasn't restarted on node {0}: {1}"
+ .format(node_name, ps_after[node_name]))
+ else:
+ # Check that Mysql on other ctl nodes was not restarted
+ assert ps == ps_after[node_name], (
+ "Mysql was restarted while it shouldn't on node {0}"
+ .format(node_name))
+
+ # STEP #9
+ show_step(9)
+ # TODO(ddmitriev): check galera cluster status and replication
+ # like it was checked in OSTF.
+
+ # STEP #10
+ show_step(10)
+ results = rally.run_tempest(pattern='set=smoke',
+ report_prefix=func_name,
+ timeout=1800)
+ # Step #11
+ show_step(11)
+ assert not results['fail'], self.show_failed_msg(results['fail'])
+
+ LOG.info("*************** DONE **************")