import json
import re

import pytest

from si_tests import logger
from si_tests import settings
from si_tests.clients.k8s.pods import K8sPod
from si_tests.managers.kaas_manager import Cluster
from si_tests.utils import utils, waiters
from si_tests.utils.ha_helpers import collect_ceph_role_machines

LOG = logger.logger
CEPH_MACHINES = collect_ceph_role_machines()

SKIP_MSG = "Damage multiple Ceph components may make recovery impossible. Please investigate previous failure."


def id_label_node(param):
    if hasattr(param, 'role'):
        return 'MODULE={};MACHINE={}'.format(param.role, param.machine.name)
    else:
        return ""


def collect_modules():
    return [m for m in CEPH_MACHINES if ('csi' not in m.role and 'mon' not in m.role)]


def select_mon():
    return [m for m in CEPH_MACHINES if 'mon' in m.role]


def get_mgr_data(cluster: Cluster):
    mgr_status = cluster.get_cephcluster().data.get('status', {}) \
        .get('fullClusterInfo', {}).get('daemonsStatus', {}).get('mgr', {}).get('status', '')
    active_mgr = re.match(r"^[a-z]+", mgr_status)
    active_mgr = (active_mgr.group(0) if active_mgr else None)
    standbys_mgr = re.findall(r"\[(.*?)\]", mgr_status)
    return active_mgr, standbys_mgr


ERRORS = {
    "mgr": {"ceph": "HEALTH_OK", "miracephhealth": "Failed", "kaascephcluster": "Failed",
            "cluster": {'Ceph': 'Not all mgrs are running'}},
    "mon": {"ceph": "HEALTH_WARN", "miracephhealth": "Failed", "kaascephcluster": "Failed",
            "cluster": {'Ceph': 'Not all mons are running'}},
    "osd": {"ceph": "HEALTH_WARN", "miracephhealth": "Failed", "kaascephcluster": "Failed",
            "cluster": {'Ceph': 'Not all osds are up'}}
}


def patch_ceph_configmap(initial_cm, mon_letter, cluster):
    """
    Workaround for PRODX-32724
    Described in https://mirantis.jira.com/browse/PRODX-32724?focusedCommentId=992624
    Remove data about previous monitor and patch configmap

    Args:
        initial_cm: initial configmap
        mon_letter: mon pod letter to delete
        cluster: cluster object

    Returns: None

    """
    initial_cm_data = initial_cm.get('data', {})
    LOG.info(f"Initial ConfigMap data: {initial_cm_data}")
    initial_mon_mapping = json.loads(initial_cm_data.get('mapping', ''))
    mon_to_remove = initial_mon_mapping.get('node', {}).get(mon_letter)

    cm = cluster.k8sclient.configmaps.get(namespace='rook-ceph', name='rook-ceph-mon-endpoints')
    cm_data = cm.data.get('data', {})
    current_mon_mapping = json.loads(cm_data.get('mapping', ''))
    if not current_mon_mapping.get('node', {}).get(mon_letter):
        LOG.info("Current ConfigMap updated by rook correctly. No need to apply a workaround")
        return

    if not mon_to_remove:
        raise Exception(f"Initial ConfigMap does not contains mapping for monitor '{mon_letter}'")

    mon_ip = mon_to_remove.get('Address')

    LOG.info(f"Remove failed monitor '{mon_letter}' entry from ceph cluster monmap")
    cmd = ['/bin/sh', '-c', f"ceph mon rm {mon_letter}"]
    ceph_tools_pod = cluster.get_ceph_tool_pod()
    ceph_tools_pod.exec(cmd)

    LOG.info("Patch ConfigMap")
    LOG.info(f"Current ConfigMap data: {cm_data}")

    # csi-cluster-config-json section
    csi_conf = cm_data.get('csi-cluster-config-json')
    csi_conf_data = json.loads(csi_conf)
    csi_conf_data[0]['monitors'] = list(set(csi_conf_data[0]['monitors']))
    new_csi_conf = json.dumps(csi_conf_data)
    # data section
    data = cm_data.get('data')
    new_data_mapping = [x for x in data.split(',') if not x.startswith(f"{mon_letter}={mon_ip}")]
    new_data = ",".join(new_data_mapping)
    # mapping section
    mapping = cm_data.get('mapping')
    mapping_data = json.loads(mapping)
    del mapping_data.get('node', {})[mon_letter]
    new_mapping = json.dumps(mapping_data)
    # maxMonId section
    new_max_index = str(new_data_mapping.index(max(new_data_mapping)) + 1)

    cm_data['csi-cluster-config-json'] = new_csi_conf
    cm_data['data'] = new_data
    cm_data['mapping'] = new_mapping
    cm_data['maxMonId'] = new_max_index
    payload = {
        'data': cm_data
    }

    cm.patch(payload)


@pytest.mark.parametrize("target_data", collect_modules(), ids=id_label_node)
@pytest.mark.usefixtures('log_step_time')
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_ceph_component_down(request, target_cluster, target_data, kaas_manager, show_step):
    """Emulate Ceph components failure/freezing and check that Ceph is available

    Scenario:
        1. Emulate Ceph component failure/freezing
        2. Check Ceph cluster status
        3. Spawn test pod and check PV i/o
        4. Delete pod
        5. Check Ceph cluster and MCC cluster statuses
        6. Check PV i/o
        7. Delete test pod and claim
        8. Check cluster readiness

    """
    if request.session.testsfailed:
        LOG.error(SKIP_MSG)
        pytest.skip(SKIP_MSG)

    rnd = utils.gen_random_string(6)
    namespace = f'test-ns-{rnd}'
    claim_name = f"test-pv-claim-{rnd}"
    mount_path = '/data/test'

    machine = target_data.machine
    container_id = target_data.container_id
    pod = target_data.pod
    node = target_data.k8s_node_name
    role = target_data.role

    LOG.info(f'Create "{namespace}" namespace')
    test_ns = target_cluster.k8sclient.namespaces.create(name=namespace, body={'metadata': {'name': namespace}})

    show_step(1)
    LOG.info("Pause container {} for pod {} on {} machine ({})".format(container_id, pod.name, machine.name, node))

    machine._run_cmd("sudo ctr --namespace k8s.io tasks pause {}".format(container_id),
                     ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE,
                     verbose_info=True, timeout=10)

    show_step(2)
    error = ERRORS[role]
    if target_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCephHealth status")
        target_cluster.check.wait_miracephhealth_state(expected_state=error['miracephhealth'])
    else:
        LOG.info("Check KaaS Ceph cluster status")
        target_cluster.check.wait_kaascephcluster_state(expected_state=error['kaascephcluster'])
    LOG.info("Check Ceph health status")
    try:
        health_info = target_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == error['ceph'], f'Health is not OK. Will not proceed. ' \
                                                       f'Current ceph health status: {health_info}'
    except AssertionError:
        target_cluster.check.wait_ceph_health_status(error['ceph'])
    LOG.info("Check MCC cluster status")
    assert target_cluster.is_ready(exp_provider_status=False,
                                   expected_fails=['Ceph']), "MCC cluster status not as expected"
    assert target_cluster.are_conditions_ready(
        expected_fails=error['cluster']), "MCC cluster error message not as expected"

    show_step(3)
    storage_classes = target_cluster.k8sclient.api_storage.list_storage_class().to_dict()['items']
    storage_class_name = [storageclass for storageclass in storage_classes
                          if storageclass['provisioner'] == 'rook-ceph.rbd.csi.ceph.com'
                          and storageclass['metadata']['annotations']
                          ['storageclass.kubernetes.io/is-default-class'] == 'true'][0]['metadata']['name']
    test_pvc = target_cluster.check.create_pvc(ns=namespace, pvc_name=claim_name, storage_class=storage_class_name)
    test_pod = target_cluster.check.create_pod(pvc_name=test_pvc.name, ns=namespace, mount_path=mount_path)
    target_cluster.check.check_pod_filesystem(mount_path=mount_path, pod=test_pod)

    show_step(4)
    pod.delete(async_del=True)

    show_step(5)
    LOG.info("Check Ceph health status")
    try:
        health_info = target_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        target_cluster.check.wait_ceph_health_status(timeout=1800)
    if target_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCephHealth status")
        target_cluster.check.wait_miracephhealth_state()
    else:
        LOG.info("Check KaaS Ceph cluster status")
        target_cluster.check.wait_kaascephcluster_state()

    show_step(6)
    target_cluster.check.check_pod_filesystem(mount_path=mount_path, pod=test_pod)

    show_step(7)
    test_pod.delete(async_del=True)
    test_pvc.delete()
    test_ns.delete()

    show_step(8)
    target_cluster.check.check_cluster_readiness()


@pytest.mark.parametrize("target_data", select_mon(), ids=id_label_node)
@pytest.mark.usefixtures('log_step_time')
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_recreate_mon(request, target_cluster, target_data, kaas_manager, show_step):
    """Emulate Ceph MON failure/freezing and check that Ceph is available

    Scenario:
        1. Check Ceph cluster
        2. Emulate Ceph MON pod failure/freezing
        3. Check Ceph cluster status
        4. Spawn test pod and check PV i/o
        5. Wait until Ceph mon pod recreated
        6. Check Ceph cluster
        7. Check PV i/o
        8. Delete test pod and claim
        9. Check cluster readiness

    """

    if request.session.testsfailed:
        LOG.error(SKIP_MSG)
        pytest.skip(SKIP_MSG)

    rnd = utils.gen_random_string(6)
    namespace = f'test-ns-{rnd}'
    claim_name = f"test-pv-claim-{rnd}"
    mount_path = '/data/test'
    role = target_data.role

    machine = target_data.machine
    container_id = target_data.container_id
    pod: K8sPod = target_data.pod
    node = target_data.k8s_node_name
    pod_name_prefix = f"rook-ceph-{role}"

    initial_cm = target_cluster.k8sclient.configmaps.get(namespace='rook-ceph', name='rook-ceph-mon-endpoints').data

    LOG.info(f'Create "{namespace}" namespace')
    test_ns = target_cluster.k8sclient.namespaces.create(name=namespace, body={'metadata': {'name': namespace}})
    pods_start = target_cluster.k8sclient.pods.list(namespace=settings.ROOK_CEPH_NS, name_prefix=pod_name_prefix)
    pods_amount = len(pods_start)

    show_step(1)
    LOG.info("Check Ceph health status")
    try:
        health_info = target_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        target_cluster.check.wait_ceph_health_status()
    if target_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCephHealth status")
        target_cluster.check.wait_miracephhealth_state()
    else:
        LOG.info("Check KaaS Ceph cluster status")
        target_cluster.check.wait_kaascephcluster_state()

    show_step(2)
    LOG.info("Pause container {} for pod {} on {} machine ({})".format(container_id, pod.name, machine.name, node))
    machine._run_cmd("sudo ctr --namespace k8s.io tasks pause {}".format(container_id),
                     ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE,
                     verbose_info=True, timeout=10)

    show_step(3)
    error = ERRORS[role]
    LOG.info("Check KaaS Ceph cluster status")
    if target_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCephHealth status")
        target_cluster.check.wait_miracephhealth_state(expected_state=error['miracephhealth'])
    else:
        LOG.info("Check KaaS Ceph cluster status")
        target_cluster.check.wait_kaascephcluster_state(expected_state=error['kaascephcluster'])
    LOG.info("Check Ceph cluster status")
    try:
        health_info = target_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == error['ceph'], f'Health is not OK. Will not proceed. ' \
                                                       f'Current ceph health status: {health_info}'
    except AssertionError:
        target_cluster.check.wait_ceph_health_status(error['ceph'])

    show_step(4)
    storage_classes = target_cluster.k8sclient.api_storage.list_storage_class().to_dict()['items']
    storage_class_name = [storageclass for storageclass in storage_classes
                          if storageclass['provisioner'] == 'rook-ceph.rbd.csi.ceph.com'
                          and storageclass['metadata']['annotations']
                          ['storageclass.kubernetes.io/is-default-class'] == 'true'][0]['metadata']['name']
    test_pvc = target_cluster.check.create_pvc(ns=namespace, pvc_name=claim_name, storage_class=storage_class_name)
    test_pod = target_cluster.check.create_pod(pvc_name=test_pvc.name, ns=namespace, mount_path=mount_path)
    target_cluster.check.check_pod_filesystem(mount_path=mount_path, pod=test_pod)

    show_step(5)
    LOG.info(f"Wait ~10 min for the pod '{pod.name}' to be removed")
    waiters.wait(lambda: not pod.exists(), timeout=900, interval=30, timeout_msg=f"Pod {pod.name} exist")

    # TODO workaround for https://mirantis.jira.com/browse/PRODX-32724
    mon_letter = re.search(r"rook-ceph-mon-(\w+)", pod.name).group(1)
    patch_ceph_configmap(initial_cm, mon_letter, target_cluster)

    # < ---

    LOG.info("Wait for the new pod to be created")
    waiters.wait(lambda: len(
        target_cluster.k8sclient.pods.list(namespace=settings.ROOK_CEPH_NS,
                                           name_prefix=pod_name_prefix)) == pods_amount,
                 timeout=900, interval=30, timeout_msg="The number of pods does not match")
    waiters.wait(lambda: target_cluster.k8sclient.pods.check_pods_statuses(target_namespaces=settings.ROOK_CEPH_NS,
                                                                           pods_prefix=pod_name_prefix), timeout=240,
                 interval=10, timeout_msg="Some pods are not in correct status/phase")

    show_step(6)
    LOG.info("Check Ceph health status")
    try:
        health_info = target_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        target_cluster.check.wait_ceph_health_status(timeout=3600)
    if target_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCephHealth status")
        target_cluster.check.wait_miracephhealth_state()
    else:
        LOG.info("Check KaaS Ceph cluster status")
        target_cluster.check.wait_kaascephcluster_state()

    show_step(7)
    target_cluster.check.check_pod_filesystem(mount_path=mount_path, pod=test_pod)

    show_step(8)
    test_pod.delete(async_del=True)
    test_pvc.delete()
    test_ns.delete()

    show_step(9)
    target_cluster.check.check_cluster_readiness()


@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                         .format(settings.TARGET_CLUSTER)])
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_ha_ceph_mgr(request, target_cluster, cluster_condition_check, _):
    """Check that Ceph HA move active mgr to second node

    Scenario:
        1. Check ceph cluster state and readiness
        2. Check on which node we have active ceph mgr and on which it in stand by
        3. Reboot node with active ceph mgr
        4. Check that all pods are Running and Ready
        5. Check ceph cluster state and readiness
        6. Check that ceph mgrs swapped

    Expected result - Ceph active mgr should migrate to node on which it was
    in stand by state
    """
    if request.session.testsfailed:
        LOG.error(SKIP_MSG)
        pytest.skip(SKIP_MSG)

    target_cluster.check.check_ha_ceph_mgr()
