#    Copyright 2024 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.
import pytest
import time

from si_tests import settings
from si_tests import logger


from si_tests.managers.kaas_manager import Cluster
from si_tests.managers.kaas_manager import Manager
from si_tests.utils import utils, waiters
from kubernetes.client.rest import ApiException
from urllib3.exceptions import MaxRetryError, ProtocolError


LOG = logger.logger


def is_cluster_management():
    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE

    ns = Manager(settings.KUBECONFIG_PATH).get_namespace(namespace_name)
    cluster = ns.get_cluster(cluster_name)
    return cluster.is_management


cluster_is_management = is_cluster_management()


cluster_is_child = not cluster_is_management


def check_pod_respawn(target_cluster, pods, pod_name_prefix):
    ns = [pod.namespace for pod in pods][0]
    LOG.info("Check number of pods after delete procedure")
    target_cluster.check.check_pods_number(pod_name_prefix, ns, pods)
    LOG.info(f"Check status for {pod_name_prefix} pods")
    target_cluster.check.check_k8s_pods(pods_prefix=pod_name_prefix,
                                        timeout=300,
                                        interval=30)


@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                               .format(settings.TARGET_CLUSTER)])
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures("check_heat_stacks_after_test")
def test_ha_stop_containerd(target_cluster: Cluster, cluster_condition_check, _):
    """Stop containerd service on every cluster node (one at a time)
            Precondition - all expected pods and their replicas must be presented
            The following scenario is executed for every node

            Scenario:
                1. SSH to node
                2. Get number of containerd-shim processes
                3. Stop containerd service and make sure it is stopped
                4. Get number of containerd-shim processes and
                   compare with previous value (must be the same)
                5. Check that all pods are Running and Ready
                6. Start containerd service

            Expected result - containerd-shim processes are running independently
            from containerd service and all containers in the pods are
            still operational even if containerd is down.
            """
    services = ["ovsdb-server", "ovs-vswitchd", "contrail-vrouter-agent"]
    machines = target_cluster.get_machines()
    for machine in machines:
        LOG.info(f"Accessing {machine.name}")
        proc = machine._run_cmd(
            "pgrep -x containerd-shim | wc -l",
            ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        LOG.info(f"There are containerd-shim {proc} processes")
        LOG.info("Check do we have mentioned services on node and get their PID if yes")
        pids_before_restart = utils.get_services_pids(machine, services,
                                                      settings.HA_TEST_PRIVATE_KEY_FILE)
        if pids_before_restart:
            LOG.info(f"Services pids before restart: {pids_before_restart}")
        LOG.info("Stopping containerd")
        machine._run_cmd("sudo service containerd stop",
                         ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
        waiters.wait(lambda: machine._run_cmd("pgrep -x containerd",
                                              check_exit_code=True,
                                              expected_codes=[1],
                                              ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE),
                     timeout=600,
                     interval=10,
                     timeout_msg="containerd service wasn't stopped")
        proc2 = machine._run_cmd(
            "pgrep -x containerd-shim | wc -l",
            ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        LOG.info(f"Now it is {proc} containerd-shim processes")
        try:
            assert int(proc) == int(proc2) + 1
            target_cluster.check.check_k8s_pods(timeout=1200, interval=30)
        except Exception as e:
            LOG.error(e)
        finally:
            LOG.info("Start containerd")
            machine._run_cmd("sudo service containerd start",
                             ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)

        if pids_before_restart:
            pids_after_restart = utils.get_services_pids(machine, services,
                                                         settings.HA_TEST_PRIVATE_KEY_FILE)
            LOG.info(f"PIDs after restart: {pids_after_restart}")
            assert pids_before_restart == pids_after_restart, \
                (f"Processes {services} restarted during containerd restart"
                 f"pids before restart: {pids_before_restart}\n"
                 f"pids after restart: {pids_after_restart}")


@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures("check_heat_stacks_after_test")
def test_ha_kill_containerd_shim(target_cluster, cluster_condition_check, _):
    """Kill all containerd-shim processes on every cluster node
    Precondition - all expected pods and their replicas must be presented
    The following scenario is executed for every node

    Scenario:
        1. SSH to node and run kill command for containerd-shim processes
        2. Wait
        3. Check that all pods are Running and Ready

    Expected result - pods and their containers are Running and Ready.
    Number of replicas are the same.
    """
    nodes = (target_cluster.get_machines())
    for node in nodes:
        LOG.info(f"Accessing {node.name}")
        node._run_cmd("for i in $(ps uax | grep containerd-shim | "
                      "grep -v grep | awk '{print $2}'); "
                      "do sudo kill -9 $i; done",
                      ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
        time.sleep(60)
        LOG.info("Waiting for k8s to be ready")
        waiters.wait_pass(
            lambda: target_cluster.k8sclient.pods.list_all(), timeout=120)
        LOG.info("Waiting for pods to be in a correct state")
        target_cluster.check.check_k8s_pods(timeout=1200, interval=30)
        # TODO: maybe check pod restarts also?
        # additonal timeout ?


@pytest.mark.parametrize("_", [f"CLUSTER_NAME={settings.TARGET_CLUSTER}"])
def test_ha_delete_helm_controller_leader_pod(target_cluster, cluster_condition_check, helmbundles_check, _):
    """Detect and delete leader pod for selected app. Check that leader changed
    Precondition - all expected pods and their replicas must be presented
    The following scenario is executed for selected application

    Scenario:
        1. Detect leader
        2. Delete leader pod
        3. Wait till leader was changed

    Expected result - pods are recreated, number of replicas is restored.
    """
    app_groups = ['helm-controller', 'stacklight-helm-controller']
    for app_group in app_groups:
        pods = target_cluster.k8sclient.pods.list_all(name_prefix=app_group)
        assert len(pods) > 0, "Pods not found for selected app"
        leader_pod_name_before = target_cluster.check.get_leader(app_prefix=app_group).split("_")[0]

        LOG.info("Get leader pod and delete it")
        for pod in pods:
            if pod.name == leader_pod_name_before:
                LOG.info(f"Deleting leader pod: {pod.name}")
                pod.delete(timeout=180)

        def wait_leader_changed(leader_before=None):
            leader_pod_name_after = target_cluster.check.get_leader(app_prefix=app_group).split("_")[0]
            if leader_pod_name_after != leader_before:
                return True
            else:
                LOG.info(f"Leader is not changed yet. Should be not {leader_before}")
                return False

        waiters.wait(lambda: wait_leader_changed(leader_pod_name_before),
                     timeout=120, interval=5)


@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}".format(settings.TARGET_CLUSTER)])
@pytest.mark.parametrize("svc_name,svc_ns,expected_downtime", settings.HA_SL_SVC_TIMEOUT)
def test_ha_sl_svc(target_cluster, svc_name, svc_ns, expected_downtime, _):
    """Delete svc pod or leader pod if STACKLIGHT_ENABLE_HA and check SVC downtime

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod(s)
         4. Wait till number of replicas will be restored
         5. Check downtime
         6. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """

    if svc_name == "iam-proxy-kibana":
        if not target_cluster.logging_enabled():
            msg = "SL logging disabled. Kibana service is missing"
            LOG.warning(msg)
            pytest.skip(msg)

    start = time.time()
    if target_cluster.sl_ha_enabled():
        expected_downtime = settings.HA_SL_SVC_HA_MODE_DOWNTIME
        LOG.info("SL HA enabled")
    else:
        LOG.info("SL HA disabled")

    svc_pods = target_cluster.k8sclient.pods.list_starts_with(svc_name, namespace=svc_ns)
    target_pod = svc_pods[0]
    LOG.info(f"Delete {target_pod.name} pod")
    target_pod.delete()
    LOG.info(f"Wait until all {svc_name} pod(s) Created and Running")
    target_cluster.check.check_k8s_pods(pods_prefix=svc_name,
                                        target_namespaces=svc_ns,
                                        timeout=300,
                                        interval=30)
    end = time.time()
    client = target_cluster.prometheusclient
    result = client.get_svc_probe_success(namespace=svc_ns, service_name=svc_name, start=start, end=end)
    downtime = target_cluster.check.calculate_k8s_downtime(result)
    utils.check_downtime(downtime, expected_downtime)


@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                               .format(settings.TARGET_CLUSTER)])
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures("check_heat_stacks_after_test")
def test_ha_haproxy(target_cluster: Cluster, cluster_condition_check, _):
    """Check that each node can balance traffic through HAProxy

    We have to check that all HAproxy can handle outage of any nodes/services

    Scenario:
        1. Select node to serve public ip. Do for each node
        2. Stop mcc-keepalived service on other nodes
        3. Check that API works well
        4. Pause k8s API containerd service on other nodes
        5. Check that API is available
        6. Stop k8s API and mke API containerd services to drop any connection to HAProxy
        7. Check that containerd services were restored and k8s API is available
        8. Check that a pod can be scheduled on the current node
        9. Restore k8s API and keepalived on other nodes
        10. Check that API works well
    """
    if target_cluster.provider is not utils.Provider.baremetal:
        msg = ("\nHA test to check HAProxy LB service were skipped "
               "as cluster doesn't support the feature")
        LOG.info(msg)
        pytest.skip(msg)
        return

    ssh_user = 'mcc-user'
    cluster_release = target_cluster.clusterrelease_version
    LOG.info(f"Cluster release: {cluster_release}")

    ssh_key_file = settings.HA_TEST_PRIVATE_KEY_FILE
    nodes = target_cluster.get_machines(machine_type="control")
    try:
        LOG.info("Cluster has %s control nodes", [m.name for m in nodes])
        # 1. Select one control node
        for one in (nodes + [nodes[0]]):
            info = f"#  Check Haproxy on the node {one.name}  #"
            LOG.info(f"\n{'#' * len(info)}"
                     f"\n{info}"
                     f"\n{'#' * len(info)}")
            # Select other control nodes
            other_nodes = set(nodes) - set([one])
            # 2. Stop Keepalived on other nodes to move VIP to the selected control node
            LOG.info("Turning off Keepalived on %s", [m.name for m in other_nodes])
            for stop_one in other_nodes:
                LOG.info("Stop keepalived service on %s", stop_one.name)
                stop_one._run_cmd(
                    "sudo systemctl stop mcc-keepalived.service",
                    verbose=True,
                    ssh_key=ssh_key_file,
                    ssh_login=ssh_user,
                    reconnect=True)

            # 3. Check API
            target_cluster.check.check_k8s_nodes()
            target_cluster.check.check_actual_expected_pods()

            # 4. Turn off k8s API on other nodes
            LOG.info("Turning off backend k8s API services on %s", [m.name for m in other_nodes])
            for stop_one in other_nodes:
                LOG.info("Pause k8s api service on %s", stop_one.name)
                stop_one._run_cmd(
                    "sudo docker pause ucp-kube-apiserver",
                    verbose=True,
                    ssh_key=ssh_key_file,
                    ssh_login=ssh_user,
                    reconnect=True)

            LOG.info("Wait 10 seconds before HAProxy reacts to stop api services")
            time.sleep(10)

            # 5. Check API availability
            LOG.info("Getting list of nodes to check API availability")
            assert_nodes = target_cluster.get_machines(machine_type="control")

            assert set(n.name for n in nodes) == set(n.name for n in assert_nodes), \
                ("Can't fetch list of nodes. It seems that haproxy can't handle "
                 f"service off on {other_nodes}")
            LOG.info(f"Response with machines: {[n.name for n in assert_nodes]}")

            # 6. Simulate haproxy <-> ucp-controller connection failure
            #    MKE containers must be restarted automatically in a few seconds
            one._run_cmd(
                "sudo docker kill ucp-kube-apiserver; sudo docker kill ucp-controller",
                verbose=True,
                ssh_key=ssh_key_file,
                ssh_login=ssh_user,
                reconnect=True)

            # 7. Check that MKE containers were restarted and k8s API is available
            LOG.info(f"\n\nWait until k8s API becomes available again "
                     f"and VIP is returned on the machine {one.name}\n")
            waiters.wait_pass(
                target_cluster.get_machines, timeout=600, interval=10,
                expected=(ApiException, MaxRetryError, ProtocolError),
                timeout_msg=f"MKE services were not automatically restarted on the machine {one.name}")

            LOG.info("\n\n*** VIP is restored ***\n")
            LOG.info("Get the list of machines to check API availability after stopping MKE services")
            assert_nodes = target_cluster.get_machines(machine_type="control")
            assert set(n.name for n in nodes) == set(n.name for n in assert_nodes), \
                ("Can't fetch list of machines. It seems that haproxy can't handle "
                 f"service off on {other_nodes}")
            LOG.info(f"Response with machines: {[n.name for n in assert_nodes]}")

            # 8. Check that a pod can be scheduled on the current node
            #    Use exec_pod_cmd() to run a simple pod
            time.sleep(300)
            one.exec_pod_cmd("sudo docker ls | grep ucp-kube-apiserver", verbose=True)

            # 9. Restore k8s API and keepalived on all nodes
            LOG.info("Recover k8s api services on %s", [m.name for m in other_nodes])
            for start_one in other_nodes:
                LOG.info("unpause k8s api service on %s", start_one.name)
                start_one._run_cmd(
                    "sudo docker unpause ucp-kube-apiserver",
                    verbose=True,
                    ssh_key=ssh_key_file,
                    ssh_login=ssh_user,
                    reconnect=True)

            target_cluster.check.check_k8s_nodes()
            target_cluster.check.check_actual_expected_pods()

            LOG.info("Turning on Keepalived on %s", [m.name for m in other_nodes])
            for start_one in other_nodes:
                LOG.info("Start Keepalived service on %s", start_one.name)
                start_one._run_cmd(
                    "sudo systemctl start mcc-keepalived.service",
                    verbose=True,
                    ssh_key=ssh_key_file,
                    ssh_login=ssh_user,
                    reconnect=True)

            LOG.info("Wait 30 seconds before HAProxy reacts to start api services. "
                     "Should give time to restore clustered pods.")
            time.sleep(30)

            # 10. Check API
            target_cluster.check.check_k8s_nodes()
            target_cluster.check.check_actual_expected_pods()

            LOG.info("Finish checking HAProxy service on %s", one.name)
    finally:
        LOG.banner("Restore k8s and keepalived services on all machines")
        for one in nodes:
            LOG.info(f"Unpause k8s service on the node {one.name}")
            one._run_cmd(
                "sudo docker unpause ucp-kube-apiserver || true",
                verbose=True,
                ssh_key=ssh_key_file,
                ssh_login=ssh_user,
                reconnect=True)
            LOG.info(f"Start Keepalived service on the node {one.name}")
            one._run_cmd(
                "sudo systemctl start mcc-keepalived.service || true",
                verbose=True,
                ssh_key=ssh_key_file,
                ssh_login=ssh_user,
                reconnect=True)
        target_cluster.check.check_k8s_nodes()
        target_cluster.check.check_actual_expected_pods()


@pytest.mark.skipif(cluster_is_management, reason="We should skip current HA test for mgmt cluster")
@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
def test_ha_delete_openvswitch_pods(target_cluster, cluster_condition_check, _):
    """Delete openvswitchpods  in any cluster one by one
     Precondition - all expected pods and their replicas must be presented
     The following scenario is executed for every namespace and
     expected pod entry in get_expected_pods

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod (1 replica)
         4. Wait till number of replicas will be restored
         5. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """
    pod_name_prefix = 'openvswitch'
    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods before delete tests: {pods}")
    for pod in pods:
        LOG.info(f"Deleting pod: {pod.name} with pod_name_prefix: {pod_name_prefix}")
        pod.delete(timeout=180)
        check_pod_respawn(target_cluster, pods, pod_name_prefix)

    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods after: {pods}")
    # Add custom checks below


@pytest.mark.skipif(cluster_is_management, reason="We should skip current HA test for mgmt cluster")
@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
def test_ha_delete_libvirt_pods(target_cluster, cluster_condition_check, _):
    """Delete libvirt in any cluster one by one
     Precondition - all expected pods and their replicas must be presented
     The following scenario is executed for every namespace and
     expected pod entry in get_expected_pods

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod (1 replica)
         4. Wait till number of replicas will be restored
         5. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """
    pod_name_prefix = 'libvirt-libvirt-default'
    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods before delete tests: {pods}")
    for pod in pods:
        LOG.info(f"Deleting pod: {pod.name} with pod_name_prefix: {pod_name_prefix}")
        pod.delete(timeout=180)
        check_pod_respawn(target_cluster, pods, pod_name_prefix)

    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods after: {pods}")
    # Add custom checks below


@pytest.mark.skipif(cluster_is_management, reason="We should skip current HA test for mgmt cluster")
@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
def test_ha_delete_mariadb_pods(target_cluster, cluster_condition_check, _):
    """Delete mariadb in any cluster one by one
     Precondition - all expected pods and their replicas must be presented
     The following scenario is executed for every namespace and
     expected pod entry in get_expected_pods

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod (1 replica)
         4. Wait till number of replicas will be restored
         5. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """
    pod_name_prefix = 'mariadb'
    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods before delete tests: {pods}")
    for pod in pods:
        LOG.info(f"Deleting pod: {pod.name} with pod_name_prefix: {pod_name_prefix}")
        pod.delete(timeout=180)
        check_pod_respawn(target_cluster, pods, pod_name_prefix)

    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods after: {pods}")
    # Add custom checks below


@pytest.mark.skipif(cluster_is_management, reason="We should skip current HA test for mgmt cluster")
@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
def test_ha_delete_etcd_pods(target_cluster, cluster_condition_check, _):
    """Delete etcd in any cluster one by one
     Precondition - all expected pods and their replicas must be presented
     The following scenario is executed for every namespace and
     expected pod entry in get_expected_pods

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod (1 replica)
         4. Wait till number of replicas will be restored
         5. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """
    pod_name_prefix = 'etcd-etcd'
    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods before delete tests: {pods}")
    for pod in pods:
        LOG.info(f"Deleting pod: {pod.name} with pod_name_prefix: {pod_name_prefix}")
        pod.delete(timeout=180)
        check_pod_respawn(target_cluster, pods, pod_name_prefix)

    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods after: {pods}")
    # Add custom checks below


@pytest.mark.skipif(cluster_is_child, reason="We should skip current HA test for child cluster")
@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
def test_ha_delete_iam_pods(target_cluster, cluster_condition_check, _):
    """Delete iam-keycloak in any cluster one by one
     Precondition - all expected pods and their replicas must be presented
     The following scenario is executed for every namespace and
     expected pod entry in get_expected_pods

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod (1 replica)
         4. Wait till number of replicas will be restored
         5. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """
    pod_name_prefix = 'iam-keycloak'
    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods before delete tests: {pods}")
    for pod in pods:
        LOG.info(f"Deleting pod: {pod.name} with pod_name_prefix: {pod_name_prefix}")
        pod.delete(timeout=180)
        check_pod_respawn(target_cluster, pods, pod_name_prefix)

    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods after: {pods}")
    # Add custom checks below


@pytest.mark.skipif(cluster_is_child, reason="We should skip current HA test for child cluster")
@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
def test_ha_delete_mcc_cache_pods(target_cluster, cluster_condition_check, _):
    """Delete mcc-cache in any cluster one by one
     Precondition - all expected pods and their replicas must be presented
     The following scenario is executed for every namespace and
     expected pod entry in get_expected_pods

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod (1 replica)
         4. Wait till number of replicas will be restored
         5. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """
    pod_name_prefix = 'mcc-cache'
    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods before delete tests: {pods}")
    for pod in pods:
        LOG.info(f"Deleting pod: {pod.name} with pod_name_prefix: {pod_name_prefix}")
        pod.delete(timeout=180)
        check_pod_respawn(target_cluster, pods, pod_name_prefix)

    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods after: {pods}")
    # Add custom checks below


@pytest.mark.skipif(cluster_is_child, reason="We should skip current HA test for child cluster")
@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
def test_ha_delete_dhcp_relay_pods(target_cluster, cluster_condition_check, _):
    """Delete dhcp-relay in any cluster one by one
     Precondition - all expected pods and their replicas must be presented
     The following scenario is executed for every namespace and
     expected pod entry in get_expected_pods

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod (1 replica)
         4. Wait till number of replicas will be restored
         5. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """
    pod_name_prefix = 'dhcp-relay'
    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods before delete tests: {pods}")
    for pod in pods:
        LOG.info(f"Deleting pod: {pod.name} with pod_name_prefix: {pod_name_prefix}")
        pod.delete(timeout=180)
        check_pod_respawn(target_cluster, pods, pod_name_prefix)

    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods after: {pods}")
    # Add custom checks below


@pytest.mark.skipif(cluster_is_child, reason="We should skip current HA test for child cluster")
@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
def test_ha_baremetal_operator_relay_pods(target_cluster, cluster_condition_check, _):
    """Delete baremetal-operator in any cluster one by one
     Precondition - all expected pods and their replicas must be presented
     The following scenario is executed for every namespace and
     expected pod entry in get_expected_pods

     Scenario:
         1. Compare actual number of replicas for pod with expected
         2. Iterate by each replica
         3. Delete pod (1 replica)
         4. Wait till number of replicas will be restored
         5. Check pods statuses in this group (Running and Ready)

     Expected result - pods are recreated, number of replicas is restored.
     """
    pod_name_prefix = 'baremetal-operator'
    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods before delete tests: {pods}")
    for pod in pods:
        LOG.info(f"Deleting pod: {pod.name} with pod_name_prefix: {pod_name_prefix}")
        pod.delete(timeout=180)
        check_pod_respawn(target_cluster, pods, pod_name_prefix)

    pods = target_cluster.k8sclient.pods.list_starts_with(pod_name_prefix)
    LOG.info(f"Pods after: {pods}")
    # Add custom checks below
