from collections import Counter
import pytest
from kubernetes.client.rest import ApiException
from exec_helpers import ExecHelperTimeoutError

from si_tests import logger
from si_tests import settings
from si_tests.managers.kaas_manager import Machine, Cluster
from si_tests.managers.ipmi_manager import IpmiManager
from si_tests.utils import waiters, utils
from si_tests.utils.ha_helpers import OutageMachineData

REBOOT_CMD = 'sudo systemctl reboot'
LOG = logger.logger


def check_cluster_readiness(cluster: Cluster):
    LOG.info(f"Check cluster '{cluster.name}' readiness")
    cluster.check.check_machines_status()
    cluster.check.check_k8s_nodes()
    cluster.check.check_k8s_pods()
    cluster.check.check_actual_expected_pods()
    cluster.check.check_cluster_readiness()
    LOG.info(f"Cluster '{cluster.name}' readiness checked\n")


def check_svc_host(cluster, times=5, failure_threshold=1):
    """
    Check SVC host external IP is available
    Args:
        cluster: current cluster
        times: num attempts to check
        failure_threshold: allowed value of error answers

    Returns:

    """
    services = {}
    status_ok = "OK"
    status_err = "Err"
    for service in cluster.k8sclient.services.list_all():
        if ip := service.get_external_ip():
            for port in service.get_ports():
                if port.target_port in ['http', 'https']:
                    services.update({service.name: {ip: port.port}})
    LOG.info("Check services:\n{}".format("\n".join(services.keys())))

    def _check_svc():
        failed_svc = []
        for service_name, service_url in services.items():
            (host, port), = service_url.items()
            result = []
            for i in range(times):
                res = waiters.tcp_ping(host, port)
                result.append(status_ok) if res else result.append(status_err)
            LOG.debug("Connect to {}:{} Result: {}".format(host, port, ", ".join(result)))
            statuses = Counter(result)
            if status_err in statuses and statuses[status_err] > failure_threshold:
                failed_svc.append(service_name)

        if failed_svc:
            LOG.info(f"The following services are not available over tcp: {failed_svc}")
        return not bool(failed_svc)

    # TODO decrease interval and timeout after implement https://mirantis.jira.com/browse/PRODX-28494
    waiters.wait(lambda: _check_svc(), interval=30, timeout=settings.HA_SVC_TIMEOUT,
                 timeout_msg=f"Downtime of some services is longer than expected {settings.HA_SVC_TIMEOUT} seconds")


def check_admission_controller(kaas_manager):
    rnd_string = utils.gen_random_string(4)
    cluster_name = f'test-ha-cl-{rnd_string}'
    namespace_name = f'test-ha-ns-{rnd_string}'
    LOG.info("Namespace name - %s", namespace_name)
    ns = kaas_manager.create_namespace(namespace_name)
    mgmt_cluster = kaas_manager.get_mgmt_cluster()
    release_name = mgmt_cluster.clusterrelease_version
    region = mgmt_cluster.region_name
    try:
        ns.create_cluster(
            cluster_name,
            release_name,
            region=region,
            credentials_name=f"fake_creds_{rnd_string}",
            provider="baremetal",
            public_key_name=f"fake_public_key_{rnd_string}")

    except ApiException as e:
        if 'admission webhook \\"validations.kaas.mirantis.com\\" denied the request' in e.body:
            LOG.info("Admission Controller checked")
        else:
            LOG.error(e)
            raise e
    finally:
        ns.delete()
        ns.wait_for_deletion()


@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_method_time')
@pytest.mark.usefixtures('mcc_loadtest_prometheus')
@pytest.mark.usefixtures('mcc_loadtest_grafana')
@pytest.mark.usefixtures('mcc_loadtest_alerta')
@pytest.mark.usefixtures('mcc_loadtest_keystone')
@pytest.mark.usefixtures('mcc_loadtest_kibana')
@pytest.mark.usefixtures('mcc_loadtest_alertmanager')
@pytest.mark.usefixtures('mcc_loadtest_keycloak')
@pytest.mark.usefixtures('create_hoc_before_lcm_and_delete_after')
def test_ha_shutdown_mgmt_vip(kaas_manager, show_step):
    """Shutdown of VIP nodes in mgmt cluster.
    Check on each unique node where VIP migrates.

    Scenario:
            1. Shutdown node that is VIP
            2. Check services external IP availability
            3. Check that leader pods migrate from powered off node
            4. Check admission controller
            5. Power on node
            6. Check that VIP migrated
            7. Check mgmt cluster

    """
    mgmt_cluster: Cluster = kaas_manager.get_mgmt_cluster()
    control_machines: list[Machine] = mgmt_cluster.get_machines(machine_type='control')
    checked_machines = []

    for i in range(len(control_machines)):
        current_keepalive = mgmt_cluster.get_keepalive_master_machine()
        current_keepalive_k8s_node_name = current_keepalive.get_k8s_node_name()

        if current_keepalive.name in checked_machines:
            LOG.info("Keepalive migrated to already checked machine")
            break
        else:
            LOG.info(f"Keepalive assigned to a not checked machine {current_keepalive.name}")
            checked_machines.append(current_keepalive.name)

        machine_ip = current_keepalive.public_ip or current_keepalive.internal_ip

        show_step(1)
        LOG.banner(f'Going to shutdown VIP nodes {current_keepalive_k8s_node_name}')
        current_keepalive.set_baremetalhost_power(online=False)

        LOG.banner("Wait until machine is no longer available via ICMP")
        waiters.wait(lambda: not waiters.icmp_ping(machine_ip), interval=5, timeout=600)

        LOG.info("Wait until k8s node status to be Not Ready")
        mgmt_cluster.check.wait_k8s_node_status(current_keepalive_k8s_node_name, expected_status='NotReady',
                                                timeout=1800)

        show_step(2)
        LOG.banner('Checking SVC host availability')
        check_svc_host(mgmt_cluster)

        show_step(3)
        LOG.banner('Checking VIP is migrated')
        mgmt_cluster.check.wait_leader_migrate_from_node(node_name=current_keepalive_k8s_node_name)

        show_step(4)
        LOG.banner('Checking admission controller')
        check_admission_controller(kaas_manager)

        show_step(5)
        LOG.banner(f'Power on {current_keepalive.name}')
        current_keepalive.set_baremetalhost_power(online=True)

        LOG.banner("Wait until machine is available via ICMP")
        waiters.wait(lambda: waiters.icmp_ping(machine_ip), interval=5, timeout=600)

        LOG.banner("Wait until k8s node status to be Ready")
        mgmt_cluster.check.wait_k8s_node_status(current_keepalive_k8s_node_name, expected_status='Ready',
                                                timeout=1800)

        show_step(6)
        new_keepalive = mgmt_cluster.get_keepalive_master_machine()
        assert new_keepalive is not current_keepalive, "Keepalive machine has not been changed"

        show_step(7)
        check_cluster_readiness(mgmt_cluster)


@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_step_time')
@pytest.mark.usefixtures('mcc_loadtest_prometheus')
@pytest.mark.usefixtures('mcc_loadtest_grafana')
@pytest.mark.usefixtures('mcc_loadtest_alerta')
@pytest.mark.usefixtures('mcc_loadtest_keystone')
@pytest.mark.usefixtures('mcc_loadtest_kibana')
@pytest.mark.usefixtures('mcc_loadtest_alertmanager')
@pytest.mark.usefixtures('mcc_loadtest_keycloak')
@pytest.mark.usefixtures('create_hoc_before_lcm_and_delete_after')
def test_ha_mgmt_reboot_all_machines(kaas_manager, show_step):
    """Soft reboot machine in MGMT cluster to check child

    Scenario:
            1. Reboot all nodes in MGMT cluster
            2. Wait for MGMT k8s to reboot
            3. Wait for MGMT Keycloak API
            4. Check MGMT cluster readiness

    Precondition:
            - Launch loadtest
    Postcondition:
            - Check workload downtime

    """

    mgmt_cluster: Cluster = kaas_manager.get_mgmt_cluster()
    machines: list[Machine] = mgmt_cluster.get_machines()
    keycloak_ip = kaas_manager.get_keycloak_ip()
    target_machines: list[OutageMachineData] = []

    for machine in machines:
        target_machines.append(OutageMachineData(
            name=machine.name,
            machine=machine,
            ip=machine.public_ip,
            k8s_node_name=machine.get_k8s_node_name()))

    show_step(1)
    LOG.banner(f"Reboot all nodes in {[tm.k8s_node_name for tm in target_machines]} with cmd : {REBOOT_CMD}")
    for target_machine in target_machines:
        LOG.info(f"Reboot machine: {target_machine.name} (node: {target_machine.k8s_node_name})")
        try:
            target_machine.machine.run_cmd(REBOOT_CMD, check_exit_code=False, timeout=10)
        except ExecHelperTimeoutError:
            LOG.debug("Current SSH connection closed by timeout since we are performed reboot")

        LOG.info(f"Wait until machine {target_machine.name} is no longer available via ICMP")
        waiters.wait(lambda: not waiters.icmp_ping(target_machine.ip), interval=5, timeout=1200)

    show_step(2)
    for target_machine in target_machines:
        LOG.banner(f"Wait until machine {target_machine.name} is available via ICMP")
        waiters.wait(lambda: waiters.icmp_ping(target_machine.ip), interval=5, timeout=1800)

    for target_machine in target_machines:
        LOG.info(f"Wait until k8s node {target_machine.k8s_node_name} become Ready")
        mgmt_cluster.check.wait_k8s_node_status(target_machine.k8s_node_name, expected_status='Ready', timeout=1800)

    show_step(3)
    LOG.banner("Wait until Keycloak API available")
    waiters.wait(lambda: waiters.tcp_ping(keycloak_ip, port=443, timeout=3), interval=5, timeout=1200)

    show_step(4)
    LOG.banner("Check mgmt health")
    mgmt_cluster.check.check_k8s_nodes(timeout=1800)
    mgmt_cluster.check.check_cluster_nodes()
    # Check/wait for correct docker service replicas in cluster
    ucp_worker_agent_name = mgmt_cluster.check.get_ucp_worker_agent_name()
    mgmt_cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    mgmt_cluster.check.check_actual_expected_pods(timeout=3200)
    mgmt_cluster.check.check_k8s_pods()
    mgmt_cluster.check.check_cluster_readiness()
    mgmt_cluster.check.check_diagnostic_cluster_status()


@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_step_time')
@pytest.mark.usefixtures('mcc_loadtest_prometheus')
@pytest.mark.usefixtures('mcc_loadtest_grafana')
@pytest.mark.usefixtures('mcc_loadtest_alerta')
@pytest.mark.usefixtures('mcc_loadtest_keystone')
@pytest.mark.usefixtures('mcc_loadtest_kibana')
@pytest.mark.usefixtures('mcc_loadtest_alertmanager')
@pytest.mark.usefixtures('mcc_loadtest_keycloak')
@pytest.mark.usefixtures('create_hoc_before_lcm_and_delete_after')
def test_ha_mgmt_sequence_reboot_machines(kaas_manager):
    """Reboot(soft) machines in MGMT cluster one by one

    Scenario:
            1. Reboot master-x node in MGMT cluster
            2. Wait for MGMT k8s to reboot
            3. Wait until cluster becomes Ready
            4. Check MGMT cluster readiness, repeat for each node
    Precondition:
            - Launch loadtest
    Postcondition:
            - Check workload downtime

    """

    mgmt_cluster: Cluster = kaas_manager.get_mgmt_cluster()
    machines: list[Machine] = mgmt_cluster.get_machines()
    keycloak_ip = kaas_manager.get_keycloak_ip()
    target_machines: list[OutageMachineData] = []

    for machine in machines:
        target_machines.append(OutageMachineData(
            name=machine.name,
            machine=machine,
            ip=machine.public_ip,
            k8s_node_name=machine.get_k8s_node_name()))

    for target_machine in target_machines:
        LOG.info(f"Reboot machine: {target_machine.name} node:"
                 f" {target_machine.k8s_node_name} with cmd : {REBOOT_CMD}")

        try:
            target_machine.machine.run_cmd(REBOOT_CMD, check_exit_code=False, timeout=10)
            LOG.banner(f"Wait until machine {target_machine.name} is no longer available via ICMP")
            waiters.wait(lambda: not waiters.icmp_ping(target_machine.ip), interval=5, timeout=1200)
            LOG.banner(f"Wait until machine {target_machine.name} is available via ICMP")
            waiters.wait(lambda: waiters.icmp_ping(target_machine.ip), interval=5, timeout=1800)
            LOG.info(f"Wait until k8s node {target_machine.k8s_node_name} become Ready")
            mgmt_cluster.check.wait_k8s_node_status(target_machine.k8s_node_name, expected_status='Ready', timeout=1800)
            LOG.banner("Wait until Keycloak API available")
            waiters.wait(lambda: waiters.tcp_ping(keycloak_ip, port=443, timeout=3), interval=5, timeout=1200)
            LOG.banner("Check mgmt health")
            mgmt_cluster.check.check_k8s_nodes(timeout=1800)
            mgmt_cluster.check.check_cluster_nodes()
            # Check/wait for correct docker service replicas in cluster
            ucp_worker_agent_name = mgmt_cluster.check.get_ucp_worker_agent_name()
            mgmt_cluster.check.check_actual_expected_docker_services(
                changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
            mgmt_cluster.check.check_actual_expected_pods(timeout=3200)
            mgmt_cluster.check.check_k8s_pods()
            mgmt_cluster.check.check_cluster_readiness()
            mgmt_cluster.check.check_diagnostic_cluster_status()
        except ExecHelperTimeoutError:
            LOG.debug("Current SSH connection closed by timeout since we are performed reboot")


@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_method_time')
@pytest.mark.usefixtures('mcc_loadtest_prometheus')
@pytest.mark.usefixtures('mcc_loadtest_grafana')
@pytest.mark.usefixtures('mcc_loadtest_alerta')
@pytest.mark.usefixtures('mcc_loadtest_keystone')
@pytest.mark.usefixtures('mcc_loadtest_kibana')
@pytest.mark.usefixtures('mcc_loadtest_alertmanager')
@pytest.mark.usefixtures('mcc_loadtest_keycloak')
@pytest.mark.usefixtures('create_hoc_before_lcm_and_delete_after')
def test_ha_shutdown_mgmt_all_nodes_at_once(kaas_manager, show_step):
    """Power off all nodes in mgmt cluster.
    Scenario:
            1. Shutdown each node im mgmt cluster
            2. Power on all nodes
            3. Check mgmt cluster

    """
    mgmt_cluster: Cluster = kaas_manager.get_mgmt_cluster()
    machines: list[Machine] = mgmt_cluster.get_machines()
    ns = kaas_manager.get_namespace('default')
    keycloak_ip = kaas_manager.get_keycloak_ip()
    ipmi_client = IpmiManager()
    target_machines: list[OutageMachineData] = []
    target_machines_node_bmc = {machine.name: ns.node_bmc_data(machine) for machine in machines}
    for machine in machines:
        target_machines.append(OutageMachineData(
            name=machine.name,
            machine=machine,
            ip=machine.public_ip,
            k8s_node_name=machine.get_k8s_node_name()))

    show_step(1)
    for target_machine in target_machines:
        LOG.banner(f'Going to shutdown  node {target_machine.name}')
        ipmi_client.set_power_off(ipmi_host=target_machines_node_bmc.get(target_machine.name)[0],
                                  ipmi_user=target_machines_node_bmc.get(target_machine.name)[1],
                                  ipmi_password=target_machines_node_bmc.get(target_machine.name)[2])

        LOG.banner("Wait until machine is no longer available via ICMP")
        waiters.wait(lambda: not waiters.icmp_ping(target_machine.ip), interval=5, timeout=600)

    show_step(2)
    for target_machine in target_machines:
        LOG.banner(f'Going to power on node {target_machine.name}')
        ipmi_client.set_power_on(ipmi_host=target_machines_node_bmc.get(target_machine.name)[0],
                                 ipmi_user=target_machines_node_bmc.get(target_machine.name)[1],
                                 ipmi_password=target_machines_node_bmc.get(target_machine.name)[2])

        LOG.banner("Wait until machine is available via ICMP")
        waiters.wait(lambda: waiters.icmp_ping(target_machine.ip), interval=5, timeout=600)

    show_step(3)
    LOG.banner("Wait until Keycloak API available")
    waiters.wait(lambda: waiters.tcp_ping(keycloak_ip, port=443, timeout=3), interval=5, timeout=1200)
    LOG.banner("Check mgmt health")
    mgmt_cluster.check.check_k8s_nodes(timeout=1800)
    mgmt_cluster.check.check_cluster_nodes()
    # Check/wait for correct docker service replicas in cluster
    ucp_worker_agent_name = mgmt_cluster.check.get_ucp_worker_agent_name()
    mgmt_cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    mgmt_cluster.check.check_actual_expected_pods(timeout=3200)
    mgmt_cluster.check.check_k8s_pods()
    mgmt_cluster.check.check_cluster_readiness()
    mgmt_cluster.check.check_diagnostic_cluster_status()
    LOG.banner('Checking SVC host availability')
    check_svc_host(mgmt_cluster)
