import pytest
import yaml

from si_tests import settings
from si_tests import logger
from si_tests.utils import waiters
from kubernetes.client.rest import ApiException

LOG = logger.logger

cluster_name = settings.TARGET_CLUSTER
namespace_name = settings.TARGET_NAMESPACE
rook_ns = settings.ROOK_CEPH_NS


def get_hyperconverge_params():
    with open(settings.CEPH_HC_DATA_PATH, 'r') as data:
        hc = yaml.safe_load(data)
    tolerations = hc.get('hyperconverge', {}).get('tolerations')
    resources = hc.get('hyperconverge', {}).get('resources')
    return tolerations, resources


@pytest.mark.usefixtures('log_method_time')
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_ceph_hyperconverge_tolerations(kaas_manager, show_step):
    """Test ceph pods taints and tolerations.

    Scenario:
        1. Gather current ceph data
        2. Check ceph health is OK before proceed
        3. Patch kaascephcluster with mon roles on control nodes and with
           new data for tolerations (mon and mgr pods)
        4. Check pods are spawned,
           placed on corresponding nodes and health is OK
    """

    # Get namespace
    LOG.info(f"Namespace name: {namespace_name}")
    ns = kaas_manager.get_namespace(namespace_name)
    cluster = ns.get_cluster(cluster_name)
    k8sclient = cluster.k8sclient
    mgmt_cluster = kaas_manager.get_mgmt_cluster()
    if cluster.workaround.skip_kaascephcluster_usage():
        ceph_crd = cluster.get_miracephcluster()
    else:
        ceph_crd = cluster.get_cephcluster()
    tolerations, _ = get_hyperconverge_params()
    ceph_health_timeout = 900

    # We will add 2 additional monitors. So we need 2 nodes
    control_machines = cluster.get_machines(machine_type='control')[0:2]

    # Third control node will be used for mgr pod migration and check
    machines_k8s_nodes_names = []
    for machine in control_machines:
        machines_k8s_nodes_names.append(machine.get_k8s_node().name)

    mon_deploys = [
        deploy for deploy in k8sclient.deployments.list(
            namespace=rook_ns) if 'rook-ceph-mon' in deploy.name]
    mon_deploys_names = [deploy.name for deploy in mon_deploys]
    mgrs_pods = [
        pod for pod in k8sclient.pods.list(
            namespace=rook_ns) if 'rook-ceph-mgr' in pod.name]
    mgrs_node_name_map = {}
    for pod in mgrs_pods:
        mgrs_node_name_map[pod.name] = pod.node_name

    show_step(1)
    if cluster.workaround.skip_kaascephcluster_usage():
        nodes_data = ceph_crd.data.get('spec').get('nodes')
        LOG.info("Removing all existing mgr roles from machines")
        for node in nodes_data:
            if 'mgr' in node.get('roles', []):
                node['roles'].remove('mgr')
        # Will add 2 new nodes (control) to ceph cluster with mon and mgr role
        for machine in control_machines:
            node_name = machine.get_k8s_node_name()
            roles = ['mon', 'mgr']
            LOG.info(f"Adding roles {roles} to machine {node_name}")
            nodes_data.append({'name': node_name, 'roles': roles})
    else:
        nodes_data = ceph_crd.data.get('spec').get('cephClusterSpec').get('nodes')
        LOG.info("Removing all existing mgr roles from machines")
        for k, v in nodes_data.items():
            if 'mgr' in v.get('roles', []):
                v['roles'].remove('mgr')
        # Will add 2 new nodes (control) to ceph cluster with mon and mgr role
        for machine in control_machines:
            roles = {'roles': ['mon', 'mgr']}
            m_name = machine.name
            LOG.info(f"Adding roles {roles['roles']} to machine {m_name}")
            nodes_data[m_name] = roles

    show_step(2)
    # Check ceph health
    if cluster.workaround.skip_kaascephcluster_usage():
        health_info = cluster.check.get_miracephhealths_health_status()
    else:
        health_info = cluster.check.get_ceph_health_status()
    assert health_info == "HEALTH_OK", (
        "Health is not OK. Will not proceed."
        "Current ceph health info: {}".format(health_info))

    show_step(3)
    if cluster.workaround.skip_kaascephcluster_usage():
        patch_data = {'spec': {
            'nodes': nodes_data,
            'hyperconverge': {'tolerations': tolerations},
        }}
        # Patch miraceph with new nodes roles and mon/mgr tolerations
        cluster.patch_ceph_data(data=patch_data, crd=ceph_crd)
    else:
        patch_data = {'spec': {'cephClusterSpec': {
            'nodes': nodes_data,
            'hyperconverge': {'tolerations': tolerations},
        }}}
        # Patch kaascephcluster with new nodes roles and mon/mgr tolerations
        mgmt_cluster.patch_ceph_data(data=patch_data, crd=ceph_crd)

    show_step(4)
    # Wait for mon pods number is increased by 2
    LOG.info("Waiting for mons quorum")
    waiters.wait(lambda: len([deploy for deploy in k8sclient.deployments.list(
        namespace=rook_ns) if 'rook-ceph-mon' in deploy.name
                              and 'canary' not in deploy.name]) == (
            len(mon_deploys) + 2), timeout=ceph_health_timeout, interval=30)
    LOG.info('Mons quorum found')
    cluster.check.check_k8s_pods(target_namespaces=rook_ns)
    # Mgr pod is required to be deleted for spawning on new node
    # https://mirantis.jira.com/browse/PRODX-12807
    # https://gerrit.mcp.mirantis.com/c/kaas/kaas-docs/+/95733/2/doc/release-notes/source/known-bugs/12807.rst # noqa
    # However in case when operator migrates other pods (e.g. mons),
    # it can be restarted too. So, we will try and except for it has been
    # already restarted
    try:
        LOG.info("Attempt to delete mgr pods")
        for mgr_pod in mgrs_pods:
            mgr_pod.delete()
    except ApiException:
        LOG.info("Pod has already been restarted")

    def _wait_mgr_pods_spawned():
        LOG.info("Waiting for new mgr pods are spawned")
        if len([pod for pod in k8sclient.pods.list(namespace=rook_ns)
                if 'rook-ceph-mgr' in pod.name and pod.name
                   not in mgrs_node_name_map.keys()]) == len(mgrs_pods):
            return True
        return False
    waiters.wait(_wait_mgr_pods_spawned, timeout=300, interval=30)
    new_mgr_pods = [
        pod for pod in k8sclient.pods.list(
            namespace=rook_ns) if 'rook-ceph-mgr' in pod.name]
    new_mgr_pods_names = [pod.name for pod in new_mgr_pods]
    LOG.info(f"New mgr pods are {new_mgr_pods_names}")
    # Check mgr pods are spawned on correct nodes
    wrong_pod_node = {}
    for pod in new_mgr_pods:
        pod_node = pod.node_name
        if pod_node not in mgrs_node_name_map.values():
            continue
        else:
            LOG.error(f"Pod {pod.name} is spawned on node {pod_node}. "
                      f"But should have spawned on one of control nodes")
            wrong_pod_node[pod.name] = {'current_node': pod_node}

    assert not wrong_pod_node, (
        f"Next manager pods are spawned on wrong node: \n{yaml.dump(wrong_pod_node)}")
    for pod in new_mgr_pods:
        LOG.info(f"New mgr pod {pod.name} is spawned on node {pod.node_name}")
    # Wait for pods up
    cluster.check.check_k8s_pods(target_namespaces=rook_ns)

    # Check ceph health
    LOG.info("Waiting for HEALTH is OK")
    try:
        health_info = cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        cluster.check.wait_ceph_health_status()
    cluster.check.check_ceph_pvc()
    new_mon_deploys = [deploy for deploy in k8sclient.deployments.list(
        namespace=rook_ns) if 'rook-ceph-mon' in deploy.name
                       and deploy.name not in mon_deploys_names]
    LOG.info("New mon deploys: {}".format([d.name for d in new_mon_deploys]))
    new_mon_pods = []
    for deploy in new_mon_deploys:
        for pod in [pod for pod in k8sclient.pods.list(namespace=rook_ns) if
                    'rook-ceph-mon' in pod.name]:
            if deploy.name in pod.name:
                new_mon_pods.append(pod)
    LOG.info("New mon pods: {}".format([pod.name for pod in new_mon_pods]))

    # Check that new mons are spawned on control nodes which were added to
    # cluster by tolerations

    LOG.info("Check pods are spawned on corresponding nodes")
    for pod in new_mon_pods:
        node = pod.data['spec']['node_name']
        if node in machines_k8s_nodes_names:
            machines_k8s_nodes_names.remove(node)
            LOG.info("Pod {} spawned on node {}".format(pod.name, node))
        else:
            LOG.warning(
                "New mon pod {} is spawned on node {}. "
                "This node was not added to tolerations!".format(
                    pod.name, node))
    assert len(machines_k8s_nodes_names) == 0, (
        "No mon pods are spawned on next nodes: {}".format(
            machines_k8s_nodes_names))


@pytest.mark.usefixtures('log_method_time')
def test_ceph_hyperconverge_resourcess(kaas_manager, show_step):
    """Test ceph pods resources assignment.
    Scenario:
        1. Gather current ceph data
        2. Check ceph health is OK before proceed
        3. Patch cluster with new data with pods resources
        4. Check pods are respawned,
           resources are applied and health is OK
        5. Check ceph pvc
    """
    ns = kaas_manager.get_namespace(namespace_name)
    cluster = ns.get_cluster(cluster_name)
    mgmt_cluster = kaas_manager.get_mgmt_cluster()
    if cluster.workaround.skip_kaascephcluster_usage():
        ceph_crd = cluster.get_miracephcluster()
        hyperconverge = ceph_crd.data.get('spec').get('hyperconverge', {})
    else:
        ceph_crd = cluster.get_cephcluster()
        hyperconverge = ceph_crd.data.get('spec').get('cephClusterSpec').get('hyperconverge', {})
    k8sclient = cluster.k8sclient
    _, resources = get_hyperconverge_params()
    show_step(1)
    hyperconverge.update({"resources": resources})

    show_step(2)
    # Check ceph health
    if cluster.workaround.skip_kaascephcluster_usage():
        health_info = cluster.check.get_miracephhealths_health_status()
    else:
        health_info = cluster.check.get_ceph_health_status()
    assert health_info == "HEALTH_OK", (
        "Health is not OK. Will not proceed."
        "Current ceph health status: {}".format(health_info))
    # Count number of replicas before patching procedure, also collect deployments
    mon_replicas_before = [r for r in k8sclient.replicasets.list(namespace=rook_ns, name_prefix='rook-ceph-mon')
                           if 'canary' not in r.name and
                           r.ready_replicas == 1]
    mon_replicas_before_num = len(mon_replicas_before)
    mon_deployments = [r for r in k8sclient.deployments.list(namespace=rook_ns, name_prefix='rook-ceph-mon')
                       if 'canary' not in r.name]
    osd_replicas_before = [r for r in k8sclient.replicasets.list(namespace=rook_ns, name_prefix='rook-ceph-osd')
                           if 'prepare' not in r.name and
                           r.ready_replicas == 1]
    osd_replicas_before_num = len(osd_replicas_before)
    osd_deployments = [r for r in k8sclient.deployments.list(namespace=rook_ns, name_prefix='rook-ceph-osd')
                       if 'prepare' not in r.name]
    mgr_replicas_before = [r for r in k8sclient.replicasets.list(namespace=rook_ns, name_prefix='rook-ceph-mgr')
                           if r.ready_replicas == 1]
    mgr_replicas_before_num = len(mgr_replicas_before)
    mgr_deployments = [r for r in k8sclient.deployments.list(namespace=rook_ns, name_prefix='rook-ceph-mgr')]
    show_step(3)
    if cluster.workaround.skip_kaascephcluster_usage():
        patch_data = {'spec': {
            'hyperconverge': hyperconverge,
        }}
        cluster.patch_ceph_data(data=patch_data, crd=ceph_crd)
    else:
        patch_data = {'spec': {'cephClusterSpec': {
            'hyperconverge': hyperconverge,
        }}}
        mgmt_cluster.patch_ceph_data(data=patch_data, crd=ceph_crd)
    cluster.check.check_k8s_pods(target_namespaces=rook_ns)

    replicas_list = mon_replicas_before + osd_replicas_before + mgr_replicas_before
    deployments_list = mon_deployments + osd_deployments + mgr_deployments

    def collect_and_check_ceph_pods(pod_prefix, resource_name, replicas_before_num, key_word=' '):
        yaml.Dumper.ignore_aliases = lambda *args: True
        _, resources = get_hyperconverge_params()
        replicas_after_num = 0
        failed_pods = dict()
        pods = [p.name for p in k8sclient.pods.list(namespace=rook_ns, name_prefix=pod_prefix)
                if key_word not in p.name]
        for pod in pods:
            expected_resources = resources.get(resource_name)
            if cluster.check.check_pod_container_expected_resources(
                    pod_name=pod, pod_container_name=resource_name, pod_ns=rook_ns,
                    expected_resources=expected_resources):
                replicas_after_num += 1
                continue
            else:
                container_resources = k8sclient.pods.get(
                        name=pod, namespace=rook_ns).containers_resources
                current_container_resources = [
                    r.get('resources', {}) for r in
                    container_resources['containers'] if
                    r.get('name', '') == resource_name][0]
                failed_pods[pod] = {
                    'expected_resources': expected_resources,
                    'current_resources': current_container_resources}
            if failed_pods:
                LOG.error(f"Next pods resources are not as expected: "
                          f"\n{yaml.dump(failed_pods)}")
                return False
            if replicas_after_num != replicas_before_num:
                LOG.error(f"Number of {resource_name} replicas before is {replicas_before_num}, but after "
                          f"patching procedure we had {replicas_after_num} number of replicas")
                return False
        return True

    def check_ceph_pods_resources():
        if not collect_and_check_ceph_pods(pod_prefix='rook-ceph-mon', resource_name='mon',
                                           replicas_before_num=mon_replicas_before_num, key_word='canary'):
            return False
        if not collect_and_check_ceph_pods(pod_prefix='rook-ceph-osd', resource_name='osd',
                                           replicas_before_num=osd_replicas_before_num, key_word='prepare'):
            return False
        if not collect_and_check_ceph_pods(pod_prefix='rook-ceph-mgr', resource_name='mgr',
                                           replicas_before_num=mgr_replicas_before_num):
            return False
        return True

    # Waiting for replicas count for old replicas is 0.
    # This means that new replicas for pods are created and
    # old replicas are scaled to 0
    LOG.info("Waiting for resources are applied for containers")
    waiters.wait(
        lambda: cluster.check.check_actual_expected_replicas_count(replicas_list=replicas_list,
                                                                   expected_replicas_count=0),
        timeout=1200, interval=120)

    show_step(4)
    # If  deployments.wait_ready() return success, it means that all pods started successfully
    LOG.info("Check that deployments in status ready")
    waiters.wait(
        lambda: cluster.check.check_deployments_ready(deployments_list=deployments_list),
        timeout=1200, interval=120)

    LOG.info("Start checking new resources in respawned pods")
    waiters.wait(
        lambda: check_ceph_pods_resources(), timeout=1200, interval=120)
    cluster.check.check_k8s_pods(target_namespaces=rook_ns)

    # Final check ceph health status
    try:
        health_info = cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        cluster.check.wait_ceph_health_status(timeout=1200)
    show_step(5)
    cluster.check.check_ceph_pvc()
