import pytest

from si_tests import logger
from si_tests import settings
from si_tests.managers.kaas_manager import Cluster, Machine, Manager  # noqa: F401
from si_tests.managers.openstack_manager import OpenStackManager
from si_tests.utils import waiters
from si_tests.utils import packaging_version as version
from si_tests.utils import update_child_clusterrelease_actions

LOG = logger.logger


def wait_ceph_status(cluster: Cluster, expected_status="HEALTH_OK"):
    ceph_health_details = cluster.check.get_ceph_status_details()
    current_status = ceph_health_details.get('health', {}).get('status')
    if current_status == expected_status:
        LOG.info(f"Expected Ceph status {current_status} equal to current")
        return True
    else:
        LOG.info(f"Expected Ceph status = {expected_status}, but current = {current_status}. Waiting...")
        return False


@pytest.mark.usefixtures("introspect_child_target_objects")
@pytest.mark.usefixtures('create_hoc_before_lcm_and_delete_after')
@pytest.mark.usefixtures('post_action_update_coredns')
def test_replace_disabled_machine_during_upgrade(kaas_manager: Manager, show_step):
    """Replace a BM 'disabled' worker Machine during Child cluster upgrade

    Scenario:
        1. Find disabled machine
        2. Replace the disabled Machine and it's BMH
        3. Move SL and Ceph roles to the new Machine
        4. Complete the interrupted upgrade process
        5. Wait for rebalance of Ceph cluster
        6. Delete all Pending openstack pvc for pods (optional)
        7. Check OpenStack readiness (optional)
        8. Check cluster readiness
    """

    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE

    ns = kaas_manager.get_namespace(namespace_name)
    LOG.info("Namespace name - %s", namespace_name)

    cluster = ns.get_cluster(cluster_name)
    LOG.info("Cluster name - %s", cluster_name)
    update_actions = update_child_clusterrelease_actions.UpdateChildClusterreleaseActions(cluster)

    show_step(1)
    machine = cluster.day2operations.get_machine_to_disable()
    if not machine.is_disabled():
        cluster.day2operations.disable_machine(machine)
    machine = cluster.day2operations.get_machine_to_disable()
    assert machine.is_disabled(), f"Machine '{machine.name}' is not disabled"
    disabled_machine_name = machine.name
    disabled_k8s_node_name = machine.get_k8s_node_name()

    show_step(2)
    LOG.banner(f"Create a new BMH and Machine instead of disabled Machine '{machine.name}'")
    new_machine = cluster.day2operations.replace_baremetal_machine_and_bmh(machine, machine_deletion_policy="unsafe")

    show_step(3)
    # Try to un-bound Stacklight pods from disabled nodes
    cluster.day2operations.disable_stacklight_for_machine(disabled_machine_name, disabled_k8s_node_name)
    # Move Ceph roles to the new Machine
    cluster.day2operations.disable_ceph_for_machine(disabled_machine_name, disabled_k8s_node_name, [new_machine])
    # Cleanup pods and pvc from stuck machines
    cluster.day2operations.cleanup_pods_and_pvc_from_k8s_node(disabled_k8s_node_name)

    show_step(4)
    cluster.check.check_update_finished(timeout=settings.KAAS_CHILD_CLUSTER_UPDATE_TIMEOUT, interval=120)
    LOG.banner("Cluster update is completed", sep="#")

    show_step(5)
    if cluster.is_child:
        LOG.info("Ceph cluster re-balancing may take some time. Wait timeout is 1h.")
        # Waiting for actual Ceph status from Ceph tools pod
        if settings.CEPH_EXEC_CRASH_ARCHIVE_ALL:
            # exec 'ceph crash archive-all' to wipe info about last crash warnings.
            cluster.check.exec_ceph_tools_command('ceph crash archive-all', return_json=False,
                                                  raise_on_fail=True)
        LOG.info("Wait Ceph HEALTH_OK status in Ceph tools")
        waiters.wait(lambda: wait_ceph_status(cluster), timeout=3600, interval=30)
        # Wait until KaaS update Cluster kind with Ceph status
        LOG.info("Wait Ceph HEALTH_OK status in cluster object")
        try:
            health_info = cluster.check.get_ceph_health_detail()
            assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                         f'Current ceph health status: {health_info}'
        except AssertionError:
            cluster.check.wait_ceph_health_status(timeout=600, interval=30)

    if cluster.clusterrelease_version.startswith(settings.MOSK_RELEASE_PREFIX) \
            and cluster.is_os_deployed():
        child_kubeconfig_name, child_kubeconfig = cluster.get_kubeconfig_from_secret()
        with open('child_conf', 'w') as f:
            f.write(child_kubeconfig)
        os_manager = OpenStackManager(kubeconfig='child_conf')

        show_step(6)  # Delete all Pending pvc for pods
        cluster.delete_pending_openstack_pods()

        show_step(7)
        LOG.info("Wait osdpl health status=Ready")
        os_manager.wait_openstackdeployment_health_status(timeout=1800)

        LOG.info("Wait os jobs to success and pods to become Ready")
        os_manager.wait_os_resources(timeout=1800)
        update_actions.reconfigure_coredns_for_mosk()

    show_step(8)
    LOG.banner("Check Cluster conditions readiness")
    cluster.check.check_cluster_readiness()
    cluster.check.check_machines_status(timeout=300)
    cluster.check.check_cluster_nodes()
    cluster.check.check_k8s_nodes()
    # Check/wait for correct docker service replicas in cluster
    ucp_worker_agent_name = cluster.check.get_ucp_worker_agent_name()
    cluster._refresh_expected_objects()  # required after changing cluster Machines count
    cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    cluster.check.check_k8s_pods()
    cluster.check.check_actual_expected_pods(timeout=3200)
    cluster.check.check_deploy_stage_success()

    cluster.check.check_bmh_inventory_presense()


@pytest.mark.usefixtures("introspect_child_target_objects")
@pytest.mark.usefixtures('create_hoc_before_lcm_and_delete_after')
def test_replace_disabled_machine_during_reboot(kaas_manager: Manager, show_step):
    """Replace BM 'disabled' worker Machine during Child cluster graceful reboot

    Scenario:
        1. Find disabled machine
        2. Replace the disabled Machine and it's BMH
        3. Move SL and Ceph roles to the new Machine
        4. Ensure that reboot request is completed
        6. Wait for rebalance of Ceph cluster
        7. Delete all Pending openstack pvc for pods (optional)
        8. Check OpenStack readiness (optional)
        9. Check cluster readiness
    """

    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE

    ns = kaas_manager.get_namespace(namespace_name)
    LOG.info("Namespace name - %s", namespace_name)

    cluster = ns.get_cluster(cluster_name)
    LOG.info("Cluster name - %s", cluster_name)

    show_step(1)
    machine = cluster.day2operations.get_machine_to_disable()
    if not machine.is_disabled():
        cluster.day2operations.disable_machine(machine)
    machine = cluster.day2operations.get_machine_to_disable()
    assert machine.is_disabled(), f"Machine '{machine.name}' is not disabled"
    disabled_machine_name = machine.name
    disabled_k8s_node_name = machine.get_k8s_node_name()

    show_step(2)
    LOG.banner(f"Create a new BMH and Machine instead of disabled Machine '{machine.name}'")
    new_machine = cluster.day2operations.replace_baremetal_machine_and_bmh(machine, machine_deletion_policy="unsafe")

    show_step(3)
    # Try to unbound Stacklight pods from disabled nodes
    cluster.day2operations.disable_stacklight_for_machine(disabled_machine_name, disabled_k8s_node_name)
    # Move Ceph roles to the new Machine
    cluster.day2operations.disable_ceph_for_machine(disabled_machine_name, disabled_k8s_node_name, [new_machine])
    # Cleanup pods and pvc from stuck machines
    cluster.day2operations.cleanup_pods_and_pvc_from_k8s_node(disabled_k8s_node_name)

    show_step(4)
    LOG.info(f"Check cluster {cluster.namespace}/{cluster.name} reboot request is completed")
    cluster.check.wait_graceful_reboot_request(expected_status=False, timeout=600)

    show_step(5)
    if cluster.is_child:
        LOG.info("Ceph cluster re-balancing may take some time. Wait timeout is 1h.")
        # Waiting for actual Ceph status from Ceph tools pod
        if settings.CEPH_EXEC_CRASH_ARCHIVE_ALL:
            # exec 'ceph crash archive-all' to wipe info about last crash warnings.
            cluster.check.exec_ceph_tools_command('ceph crash archive-all', return_json=False,
                                                  raise_on_fail=True)
        LOG.info("Wait Ceph HEALTH_OK status in Ceph tools")
        waiters.wait(lambda: wait_ceph_status(cluster), timeout=3600, interval=30)
        # Wait until KaaS update Cluster kind with Ceph status
        LOG.info("Wait Ceph HEALTH_OK status in cluster object")
        try:
            health_info = cluster.check.get_ceph_health_detail()
            assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                         f'Current ceph health status: {health_info}'
        except AssertionError:
            cluster.check.wait_ceph_health_status(timeout=600, interval=30)

    if cluster.clusterrelease_version.startswith(settings.MOSK_RELEASE_PREFIX) \
            and cluster.is_os_deployed():
        child_kubeconfig_name, child_kubeconfig = cluster.get_kubeconfig_from_secret()
        with open('child_conf', 'w') as f:
            f.write(child_kubeconfig)
        os_manager = OpenStackManager(kubeconfig='child_conf')
        # TODO(tleontovich) Delete if statement after 2-28
        if version.parse(cluster.clusterrelease_version) < version.parse('mosk-17-1-0-rc-24-1'):
            show_step(6)  # Delete all Pending pvc for pods
            cluster.delete_pending_openstack_pods()

        show_step(7)
        LOG.info("Wait osdpl health status=Ready")
        os_manager.wait_openstackdeployment_health_status(timeout=1800)
        LOG.info("Wait os jobs to success and pods to become Ready")
        os_manager.wait_os_resources(timeout=1800)

    show_step(8)
    LOG.banner("Check Cluster conditions readiness")
    cluster.check.check_cluster_readiness()
    cluster.check.check_machines_status(timeout=300)
    cluster.check.check_cluster_nodes()
    cluster.check.check_k8s_nodes()
    # Check/wait for correct docker service replicas in cluster
    ucp_worker_agent_name = cluster.check.get_ucp_worker_agent_name()
    cluster._refresh_expected_objects()  # required after changing cluster Machines count
    cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    cluster.check.check_k8s_pods()
    cluster.check.check_actual_expected_pods(timeout=3200)
    cluster.check.check_deploy_stage_success()

    cluster.check.check_bmh_inventory_presense()
