#    Copyright 2022 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.
import random

import pytest

from si_tests import logger
from si_tests import settings
from si_tests.managers.kaas_manager import Machine, Cluster, Namespace
from si_tests.managers.openstack_manager import OpenStackManager
from si_tests.utils import waiters
from si_tests.utils.ha_helpers import collect_machine_sequence, TargetMachineData, id_label_node, OutageMachineData

LOG = logger.logger

SHUTDOWN_CMD = 'sudo shutdown -h +1'  # shutdown after 1 minute


@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures("check_heat_stacks_after_test")
@pytest.mark.parametrize("target_machine_data", collect_machine_sequence(), ids=id_label_node)
@pytest.mark.usefixtures("check_stacklight_svc_downtime")
def test_ha_child_shutdown_mosk(request, kaas_manager, show_step, target_machine_data: TargetMachineData):
    """Shutdown machine in MOSK cluster

    Scenario:
            1. Soft shut down the node
            2. Wait until k8s node becomes NotReady
            3. Power on node
            4. Check Ceph cluster
            5. Check cluster readiness
            6. Check OpenStack resources

    Precondition:
            - Launch RefApp
            - Launch loadtest
    Postcondition:
            - Check workload downtime

    """
    if request.session.testsfailed:
        pytest.skip("Skip because only one node can be damage")

    managed_ns: Namespace = kaas_manager.get_namespace(settings.TARGET_NAMESPACE)
    cluster: Cluster = managed_ns.get_cluster(settings.TARGET_CLUSTER)
    child_kubeconfig_name, child_kubeconfig = cluster.get_kubeconfig_from_secret()
    with open(child_kubeconfig_name, 'w') as f:
        f.write(child_kubeconfig)
    os_manager = OpenStackManager(kubeconfig=child_kubeconfig_name)
    machine: Machine = target_machine_data.machine.machine
    k8s_node_name = machine.get_k8s_node_name()

    show_step(1)
    try:
        machine.exec_pod_cmd(SHUTDOWN_CMD, verbose=False, timeout=10)
    except Exception:
        LOG.debug("Current SSH connection closed by timeout since we are performed shutdown")
    machine_ip = machine.public_ip
    LOG.info("Wait until machine is no longer available via ICMP")
    waiters.wait(lambda: not waiters.icmp_ping(machine_ip), interval=5, timeout=1200)

    show_step(2)
    cluster.check.wait_k8s_node_status(k8s_node_name, expected_status='NotReady')

    show_step(3)
    machine.set_baremetalhost_power(online=True)
    LOG.info("Wait until machine is available via ICMP")
    waiters.wait(lambda: waiters.icmp_ping(machine_ip), interval=5, timeout=600)

    LOG.info("Wait until k8s node become Ready")
    cluster.check.wait_k8s_node_status(k8s_node_name, expected_status='Ready')

    show_step(4)
    try:
        health_info = cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        cluster.check.wait_ceph_health_status(timeout=600, interval=30)

    show_step(5)
    cluster.check.check_k8s_nodes(timeout=600)
    # Check/wait for correct docker service replicas in cluster
    ucp_worker_agent_name = cluster.check.get_ucp_worker_agent_name()
    cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    os_manager.wr_prodx_54579()
    cluster.check.check_k8s_pods()
    cluster.check.check_actual_expected_pods(timeout=3200)
    cluster.check.check_cluster_readiness()

    show_step(6)
    LOG.info("Wait osdpl health status=Ready")
    os_manager.wait_openstackdeployment_health_status(timeout=1800)
    LOG.info("Wait os jobs to success and pods to become Ready")
    os_manager.wait_os_resources(timeout=1800, exclude_node_shutdown=True)


@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures("check_heat_stacks_after_test")
@pytest.mark.usefixtures('log_step_time')
@pytest.mark.usefixtures("check_stacklight_svc_downtime")
def test_ha_mgmt_sequential_shutdown(kaas_manager, show_step):
    """Sequential shutdown machine in MGMT cluster to check child

    Scenario:
            1. Sequential shutdown all machines in MGMT cluster (N-2)
            2. Check child cluster readiness
            3. Check child Ceph cluster
            4. Check child OpenStack resources
            5. Power On MGMT machines
            6. Check MGMT cluster readiness

    Precondition:
            - Launch RefApp
            - Launch loadtest
    Postcondition:
            - Check workload downtime

    """

    managed_ns: Namespace = kaas_manager.get_namespace(settings.TARGET_NAMESPACE)
    child_cluster: Cluster = managed_ns.get_cluster(settings.TARGET_CLUSTER)
    mgmt_cluster: Cluster = child_cluster.get_parent_cluster()
    child_kubeconfig_name, child_kubeconfig = child_cluster.get_kubeconfig_from_secret()
    with open(child_kubeconfig_name, 'w') as f:
        f.write(child_kubeconfig)
    os_manager = OpenStackManager(kubeconfig=child_kubeconfig_name)
    all_machines: list[Machine] = mgmt_cluster.get_machines()
    target_machines: list[OutageMachineData] = []

    # Shuffle existing machines. Remove keepalive and select all machines except min 2 value
    random.shuffle(all_machines)
    current_keepalive = mgmt_cluster.get_keepalive_master_machine()
    # shutdown target machines is keepalive and all machines except the minimum 2 available machines
    machines = [m for m in all_machines if m.name != current_keepalive.name][2:] + [current_keepalive]

    for machine in machines:
        target_machines.append(OutageMachineData(
            name=machine.name,
            machine=machine,
            ip=machine.public_ip,
            k8s_node_name=machine.get_k8s_node_name()))

    show_step(1)
    for target_machine in target_machines:
        LOG.info(f"Power Off machine: {target_machine.name} (node: {target_machine.k8s_node_name})")
        try:
            target_machine.machine.exec_pod_cmd(SHUTDOWN_CMD, verbose=False, timeout=10)
        except Exception:
            LOG.debug("Current SSH connection closed by timeout since we are performed shutdown")
        LOG.info(f"Wait until machine {target_machine.name} is no longer available via ICMP")
        waiters.wait(lambda: not waiters.icmp_ping(target_machine.ip), interval=5, timeout=1200)
        LOG.info(f"Wait until k8s node {target_machine.k8s_node_name} become NotReady")
        mgmt_cluster.check.wait_k8s_node_status(target_machine.k8s_node_name, expected_status='NotReady', timeout=1800)

    try:
        LOG.info(f"Check child cluster: {child_cluster.name}")

        show_step(2)
        child_cluster.check.check_k8s_nodes(timeout=1800)
        # Check/wait for correct docker service replicas in cluster
        ucp_worker_agent_name = child_cluster.check.get_ucp_worker_agent_name()
        child_cluster.check.check_actual_expected_docker_services(
            changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
        os_manager.wr_prodx_54579()
        child_cluster.check.check_k8s_pods()
        child_cluster.check.check_cluster_readiness()

        show_step(3)
        try:
            health_info = child_cluster.check.get_ceph_health_detail()
            assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                         f'Current ceph health status: {health_info}'
        except AssertionError:
            child_cluster.check.wait_ceph_health_status(timeout=1800, interval=30)

        show_step(4)
        LOG.info("Wait osdpl health status=Ready")
        os_manager.wait_openstackdeployment_health_status(timeout=1800)
        LOG.info("Wait os jobs to success and pods to become Ready")
        os_manager.wait_os_resources(timeout=1800, exclude_node_shutdown=True)

    except Exception as e:
        LOG.error(f"The following exception occurred during child testing: \n{e}")
        raise e
    finally:
        show_step(5)
        for target_machine in target_machines:
            LOG.info(f"Power On machine: {target_machine.name} (node: {target_machine.k8s_node_name})")
            target_machine.machine.set_baremetalhost_power(online=True)
            LOG.info(f"Wait until machine {target_machine.name} is available via ICMP")
            waiters.wait(lambda: waiters.icmp_ping(target_machine.ip), interval=5, timeout=600)
            LOG.info(f"Wait until k8s node {target_machine.k8s_node_name} become Ready")
            mgmt_cluster.check.wait_k8s_node_status(target_machine.k8s_node_name, expected_status='Ready', timeout=1800)

    show_step(6)
    mgmt_cluster.check.check_k8s_nodes(timeout=600)
    # Check/wait for correct docker service replicas in cluster
    ucp_worker_agent_name = mgmt_cluster.check.get_ucp_worker_agent_name()
    mgmt_cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    mgmt_cluster.check.check_k8s_pods()
    mgmt_cluster.check.check_actual_expected_pods(timeout=3200)
    mgmt_cluster.check.check_cluster_readiness()
