#    Copyright 2022 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import pytest
from exec_helpers import ExecHelperTimeoutError

from si_tests import logger
from si_tests import settings
from si_tests.managers.kaas_manager import Machine, Cluster, Namespace
from si_tests.managers.openstack_manager import OpenStackManager
from si_tests.utils import waiters
from si_tests.utils.ha_helpers import collect_machine_sequence, TargetMachineData, id_label_node, OutageMachineData

LOG = logger.logger

# describe https://www.kernel.org/doc/html/v4.15/admin-guide/sysrq.html
REBOOT_CMD = 'sudo systemctl reboot'


@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures("check_heat_stacks_after_test")
@pytest.mark.usefixtures('log_step_time')
@pytest.mark.parametrize("target_machine_data", collect_machine_sequence(), ids=id_label_node)
@pytest.mark.usefixtures("check_stacklight_svc_downtime")
def test_ha_child_reboot_mosk(request, kaas_manager, show_step, target_machine_data: TargetMachineData):
    """Soft reboot machine in child MOSK cluster

    Scenario:
            1. Soft reboot the node
            2. Wait for k8s to reboot
            3. Check cluster readiness
            4. Check Ceph cluster
            5. Check OpenStack resources

    Precondition:
            - Launch RefApp
            - Launch loadtest
    Postcondition:
            - Check workload downtime

    """
    if request.session.testsfailed:
        pytest.skip("Skip because only one node can be damage")

    managed_ns: Namespace = kaas_manager.get_namespace(settings.TARGET_NAMESPACE)
    cluster: Cluster = managed_ns.get_cluster(settings.TARGET_CLUSTER)
    machine: Machine = target_machine_data.machine.machine
    k8s_node_name = machine.get_k8s_node_name()
    machine_ip = machine.public_ip

    show_step(1)
    try:
        machine.run_cmd(REBOOT_CMD, check_exit_code=False, timeout=10)
    except ExecHelperTimeoutError:
        LOG.debug("Current SSH connection closed by timeout since we are performed reboot")

    show_step(2)
    LOG.info("Wait until machine is no longer available via ICMP")
    waiters.wait(lambda: not waiters.icmp_ping(machine_ip), interval=5, timeout=1200)

    LOG.info("Wait until k8s node become NotReady")
    cluster.check.wait_k8s_node_status(k8s_node_name, expected_status='NotReady', timeout=900)

    LOG.info("Wait until machine is available via ICMP")
    waiters.wait(lambda: waiters.icmp_ping(machine_ip), interval=5, timeout=1800)

    LOG.info("Wait until k8s node become Ready")
    cluster.check.wait_k8s_node_status(k8s_node_name, expected_status='Ready', timeout=1800)

    show_step(3)
    cluster.check.check_k8s_nodes(timeout=1800)
    # Check/wait for correct docker service replicas in cluster
    ucp_worker_agent_name = cluster.check.get_ucp_worker_agent_name()
    cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    cluster.check.check_actual_expected_pods(timeout=3200)
    cluster.check.check_k8s_pods()
    cluster.check.check_cluster_readiness()

    show_step(4)
    try:
        health_info = cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        cluster.check.wait_ceph_health_status(timeout=1800, interval=30)

    show_step(5)
    child_kubeconfig_name, child_kubeconfig = cluster.get_kubeconfig_from_secret()
    with open(child_kubeconfig_name, 'w') as f:
        f.write(child_kubeconfig)
    os_manager = OpenStackManager(kubeconfig=child_kubeconfig_name)
    LOG.info("Wait osdpl health status=Ready")
    os_manager.wait_openstackdeployment_health_status(timeout=1800)
    LOG.info("Wait os jobs to success and pods to become Ready")
    os_manager.wait_os_resources(timeout=1800)


@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures("check_heat_stacks_after_test")
@pytest.mark.usefixtures('log_step_time')
@pytest.mark.usefixtures("check_stacklight_svc_downtime")
def test_ha_child_sequential_reboot(kaas_manager, show_step):
    """Soft reboot machine in MOSK cluster

    Scenario:
            1. Soft reboot all nodes
            2. Wait for k8s to reboot
            3. Check cluster readiness
            4. Check Ceph cluster
            5. Check OpenStack resources

    Precondition:
            - Launch RefApp
            - Launch loadtest
    Postcondition:
            - Check workload downtime

    """

    managed_ns: Namespace = kaas_manager.get_namespace(settings.TARGET_NAMESPACE)
    cluster: Cluster = managed_ns.get_cluster(settings.TARGET_CLUSTER)
    machines: list[Machine] = cluster.get_machines()
    target_machines: list[OutageMachineData] = []

    for machine in machines:
        target_machines.append(OutageMachineData(
            name=machine.name,
            machine=machine,
            ip=machine.public_ip,
            k8s_node_name=machine.get_k8s_node_name()))

    show_step(1)
    for target_machine in target_machines:
        LOG.info(f"Reboot machine: {target_machine.name} (node: {target_machine.k8s_node_name})")
        try:
            target_machine.machine.run_cmd(REBOOT_CMD, check_exit_code=False, timeout=10)
        except ExecHelperTimeoutError:
            LOG.debug("Current SSH connection closed by timeout since we are performed reboot")

        LOG.info(f"Wait until machine {target_machine.name} is no longer available via ICMP")
        waiters.wait(lambda: not waiters.icmp_ping(target_machine.ip), interval=5, timeout=1200)

    show_step(2)
    for target_machine in target_machines:
        LOG.info(f"Wait until machine {target_machine.name} is available via ICMP")
        waiters.wait(lambda: waiters.icmp_ping(target_machine.ip), interval=5, timeout=1800)
        LOG.info(f"Wait until k8s node {target_machine.k8s_node_name} become Ready")
        cluster.check.wait_k8s_node_status(target_machine.k8s_node_name, expected_status='Ready', timeout=1800)

    show_step(3)
    cluster.check.check_k8s_nodes(timeout=1800)
    # Check/wait for correct docker service replicas in cluster
    ucp_worker_agent_name = cluster.check.get_ucp_worker_agent_name()
    cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    cluster.check.check_actual_expected_pods(timeout=3200)
    cluster.check.check_k8s_pods()
    cluster.check.check_cluster_readiness()

    show_step(4)
    try:
        health_info = cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        cluster.check.wait_ceph_health_status(timeout=1800, interval=30)

    show_step(5)
    child_kubeconfig_name, child_kubeconfig = cluster.get_kubeconfig_from_secret()
    with open(child_kubeconfig_name, 'w') as f:
        f.write(child_kubeconfig)
    os_manager = OpenStackManager(kubeconfig=child_kubeconfig_name)
    LOG.info("Wait osdpl health status=Ready")
    os_manager.wait_openstackdeployment_health_status(timeout=1800)
    LOG.info("Wait os jobs to success and pods to become Ready")
    os_manager.wait_os_resources(timeout=1800)
