Merge "Add zookeeper automation test"
diff --git a/tcp_tests/tests/system/test_backup_restore_zookeeper.py b/tcp_tests/tests/system/test_backup_restore_zookeeper.py
new file mode 100644
index 0000000..3f6b948
--- /dev/null
+++ b/tcp_tests/tests/system/test_backup_restore_zookeeper.py
@@ -0,0 +1,299 @@
+# Copyright 2019 Mirantis, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+import pytest
+import time
+
+from tcp_tests import logger
+from tcp_tests import settings
+
+LOG = logger.logger
+
+
+class TestBackupRestoreZooKeeper(object):
+ def get_cfg_fqn(self, salt):
+ salt_master = salt.local("I@salt:master", "network.get_fqdn")
+ return salt_master['return'][0].keys()[0]
+
+ def create_network(self, underlay_actions, network_name, cfg_node):
+ underlay_actions.check_call(
+ "source /root/keystonercv3 && "
+ "openstack network create {}".format(network_name),
+ node_name=cfg_node,
+ raise_on_err=False)
+
+ def is_network_restored(self, underlay_actions, network_name, cfg_node):
+ get_net_by_name = underlay_actions.check_call(
+ "source /root/keystonercv3 && " +
+ "openstack network list --name {}".format(network_name),
+ node_name=cfg_node,
+ raise_on_err=False)["stdout"]
+ return get_net_by_name != ['\n']
+
+ @pytest.fixture()
+ def handle_restore_params(self, reclass_actions):
+ reclass_actions.add_key(
+ "parameters._param.zookeeper.backup.client.restore_latest",
+ "1",
+ "cluster/*/infra/backup/client_zookeeper.yml")
+ reclass_actions.add_bool_key(
+ "parameters._param.zookeeper.backup.client.enabled",
+ "True",
+ "cluster/*/infra/backup/client_zookeeper.yml")
+ reclass_actions.add_key(
+ "parameters._param.zookeeper.backup.client.restore_from",
+ "remote",
+ "cluster/*/infra/backup/client_zookeeper.yml")
+ yield
+ reclass_actions.delete_key(
+ "parameters._param.zookeeper.backup.client.restore_latest",
+ "cluster/*/infra/backup/client_zookeeper.yml")
+ reclass_actions.delete_key(
+ "parameters._param.zookeeper.backup.client.enabled",
+ "cluster/*/infra/backup/client_zookeeper.yml")
+ reclass_actions.delete_key(
+ "parameters._param.zookeeper.backup.client.restore_from",
+ "cluster/*/infra/backup/client_zookeeper.yml")
+
+ def salt_cmd_on_control(self, salt, cmd):
+ salt.run_state("I@opencontrail:control", "cmd.run", cmd)
+
+ def update_mine_and_grains(self, salt):
+ salt.run_state("I@zookeeper:backup:client", "saltutil.sync_grains")
+ salt.run_state("I@zookeeper:backup:client", "saltutil.mine.flush")
+ salt.run_state("I@zookeeper:backup:client", "saltutil.mine.update")
+
+ def get_leader_node(self, salt):
+ contrail_leader = salt.local(
+ "I@opencontrail:control",
+ "cmd.run",
+ "echo stat | nc localhost 2181 | grep leader")
+ result = contrail_leader['return'][0]
+ for node, leader in result.iteritems():
+ if leader == u'Mode: leader':
+ return node
+ return None
+
+ @pytest.fixture()
+ def create_instant_backup(self):
+
+ def create(salt, leader):
+ salt.run_state("*", "saltutil.refresh_pillar")
+ salt.run_state(
+ "I@zookeeper:backup:client or I@zookeeper:backup:server",
+ "state.sls salt.minion")
+ self.update_mine_and_grains(salt)
+ salt.run_state("I@zookeeper:backup:client",
+ " state.sls openssh.client,zookeeper.backup")
+ salt.run_state("I@zookeeper:backup:server", "zookeeper.backup")
+ backup = salt.run_state(
+ leader,
+ "cmd.run",
+ "bash /usr/local/bin/zookeeper-backup-runner.sh")
+ LOG.info(backup)
+ return create
+
+ @pytest.mark.grab_versions
+ @pytest.mark.parametrize("_", [settings.ENV_NAME])
+ @pytest.mark.run_mcp_update
+ def test_backup_creation(self, salt_actions, show_step,
+ create_instant_backup, _):
+ """ Backup ZooKeeper Database
+ Scenario:
+ 1. Refresh pillars on all the nodes
+ Apply the salt.minion state
+ Refresh grains and mine for the ZooKeeper client node
+ Apply required state on the ZooKeeper client nodes
+ Apply required state on the ZooKeeper server nodes
+ Create an instant backup
+ 2. Verify that a complete backup has been created
+
+ """
+ salt = salt_actions
+ leader = self.get_leader_node(salt)
+ show_step(1)
+ create_instant_backup(salt, leader)
+ show_step(2)
+ backup_on_leader_node = salt.run_state(
+ leader,
+ "cmd.run",
+ "ls /var/backups/zookeeper/full")
+ LOG.info(backup_on_leader_node)
+ assert len(backup_on_leader_node[0]['return'][0].values()) > 0, \
+ "Backup is not created on ZooKeeper leader node"
+ backup_on_server_node = salt.run_state(
+ "I@zookeeper:backup:server",
+ "cmd.run",
+ "ls /srv/volumes/backup/zookeeper/full")
+ LOG.info(backup_on_server_node)
+ assert len(backup_on_server_node[0]['return'][0].values()) > 0, \
+ "Backup is not created on ZooKeeper server node"
+
+ @pytest.mark.grab_versions
+ @pytest.mark.parametrize("_", [settings.ENV_NAME])
+ @pytest.mark.run_mcp_update
+ def test_restore_zookeeper_with_job(self, salt_actions, reclass_actions,
+ drivetrain_actions, underlay_actions,
+ show_step, create_instant_backup,
+ handle_restore_params, _):
+ """ Restore ZooKeeper Database with Jenkins job
+
+ Scenario:
+ 0. Restore from the backup. Prepare parameters
+ 1. Create network to be backuped
+ 2. Create an instant backup
+ 3. Restore from the backup. Add job class for restore ZooKeeper
+ 4. Restore from the backup. Run Jenkins job
+ """
+ salt = salt_actions
+ reclass = reclass_actions
+ dt = drivetrain_actions
+ fixture_network_name = "testzoo1"
+ cfg_node = self.get_cfg_fqn(salt)
+ leader = self.get_leader_node(salt)
+ jenkins_start_timeout = 60
+ jenkins_build_timeout = 1800
+ show_step(1)
+ self.create_network(underlay_actions, fixture_network_name, cfg_node)
+ show_step(2)
+ create_instant_backup(salt, leader)
+ show_step(3)
+ reclass.add_class(
+ "system.jenkins.client.job.deploy.update.restore_zookeeper",
+ "cluster/*/cicd/control/leader.yml")
+ salt.run_state("I@jenkins:client", "jenkins.client")
+
+ show_step(4)
+ job_name = 'deploy-zookeeper-restore'
+ run_zookeeper_restore = dt.start_job_on_cid_jenkins(
+ start_timeout=jenkins_start_timeout,
+ build_timeout=jenkins_build_timeout,
+ job_name=job_name)
+ assert run_zookeeper_restore == 'SUCCESS'
+ network_presented = self.is_network_restored(
+ underlay_actions,
+ fixture_network_name,
+ cfg_node)
+ assert network_presented, \
+ 'Network {} is not restored'.format(fixture_network_name)
+
+ @pytest.mark.grab_versions
+ @pytest.mark.parametrize("_", [settings.ENV_NAME])
+ @pytest.mark.run_mcp_update
+ def test_restore_zookeeper_manually(self, salt_actions,
+ show_step,
+ underlay_actions,
+ create_instant_backup,
+ handle_restore_params, _):
+ """Restore ZooKeeper Database manually
+
+ Scenario:
+ 0. Restore from the backup. Prepare parameters
+ 1. Create network to be backuped
+ 2. Create an instant backup
+ 3. Restore. Stop the config services on control nodes
+ 4. Restore. Stop the control services on control nodes
+ 5. Restore. Stop the zookeeper service on controller nodes
+ 6. Restore. Remove the ZooKeeper files from the controller nodes
+ 7. Restore. Run the zookeeper state
+ 8. Restore. Start the zookeeper service on the controller nodes
+ 9. Restore. Start the config services on the controller nodes
+ 10. Restore. Start the control services on control nodes
+ 11. Restore. Verify that OpenContrail is in correct state
+ """
+ s = salt_actions
+ fixture_network_name = "testzoo2"
+ leader = self.get_leader_node(s)
+ cfg_node = self.get_cfg_fqn(s)
+ self.create_network(underlay_actions,
+ fixture_network_name,
+ cfg_node)
+ show_step(1)
+ self.create_network(underlay_actions, fixture_network_name, cfg_node)
+ show_step(2)
+ create_instant_backup(s, leader)
+ show_step(3)
+ cmd = "doctrail controller systemctl {} {}"
+ self.salt_cmd_on_control(s, cmd.format("stop", "contrail-api"))
+ self.salt_cmd_on_control(s, cmd.format("stop", "contrail-schema"))
+ self.salt_cmd_on_control(s, cmd.format("stop", "contrail-svc-monitor"))
+ self.salt_cmd_on_control(s, cmd.format("stop",
+ "contrail-device-manager"))
+ self.salt_cmd_on_control(s, cmd.format("stop",
+ "contrail-config-nodemgr"))
+
+ show_step(4)
+ self.salt_cmd_on_control(s, cmd.format("stop", "contrail-control"))
+ self.salt_cmd_on_control(s, cmd.format("stop", "contrail-named"))
+ self.salt_cmd_on_control(s, cmd.format("stop", "contrail-dns"))
+ self.salt_cmd_on_control(s, cmd.format("stop",
+ "contrail-control-nodemgr"))
+ show_step(5)
+ self.salt_cmd_on_control(s,
+ "doctrail controller service zookeeper stop")
+ show_step(6)
+ self.salt_cmd_on_control(
+ s,
+ "rm -rf /var/lib/config_zookeeper_data/version-2/*")
+ show_step(7)
+ s.run_state("I@opencontrail:control",
+ "cmd.run",
+ "/var/backups/zookeeper/dbrestored")
+ s.run_state("I@opencontrail:control", "state.apply",
+ "zookeeper.backup")
+ show_step(8)
+ self.salt_cmd_on_control(s,
+ "doctrail controller service zookeeper start")
+ show_step(9)
+ self.salt_cmd_on_control(s, cmd.format("start", "contrail-api"))
+ self.salt_cmd_on_control(s, cmd.format("start", "contrail-schema"))
+ self.salt_cmd_on_control(s, cmd.format("start",
+ "contrail-svc-monitor"))
+ self.salt_cmd_on_control(s, cmd.format("start",
+ "contrail-device-manager"))
+ self.salt_cmd_on_control(s, cmd.format("start",
+ "contrail-config-nodemgr"))
+ show_step(10)
+ self.salt_cmd_on_control(s, cmd.format("start", "contrail-control"))
+ self.salt_cmd_on_control(s, cmd.format("start", "contrail-named"))
+ self.salt_cmd_on_control(s, cmd.format("start", "contrail-dns"))
+ self.salt_cmd_on_control(s, cmd.format("start",
+ "contrail-control-nodemgr"))
+ show_step(11)
+ time.sleep(60)
+ network_presented = self.is_network_restored(
+ underlay_actions,
+ fixture_network_name,
+ cfg_node)
+ assert network_presented, \
+ 'Network {} is not restored'.format(fixture_network_name)
+ statuses_ok = True
+ failures = ''
+ statuses = s.run_state(
+ "I@opencontrail:control",
+ "cmd.run",
+ "doctrail controller contrail-status")
+
+ for node_name, statuses_output in statuses[0]["return"][0].iteritems():
+ for status_line in statuses_output.splitlines():
+ if not status_line.startswith("==") and status_line != '':
+ service, status = status_line.split(':')
+ status = status.strip()
+ if status not in ["active", "backup"]:
+ statuses_ok = False
+ failures += "On node {} service {} has " \
+ "unexpected status after restore:" \
+ " {} \n".format(node_name,
+ service.strip(),
+ status)
+ assert statuses_ok, failures