import pytest
import os
import re
import yaml

import kubernetes

from si_tests.deployments import openstack_deploy
from si_tests.managers import openstack_manager
from si_tests import settings
from si_tests import logger
from si_tests.utils import update_release_names

LOG = logger.logger


def dump_nodes_info(nwll):
    nodes_data_map = {}
    for node in nwll:
        node_name = node["spec"]["nodeName"]
        nwl_name = node["metadata"]["name"]
        nwl_state = node["status"]["state"]
        nwl_resource_version = node["metadata"]["resource_version"]
        # Use tuple as a key, because a node may have several NWLs with different names
        nodes_data_map[f"{node_name}/{nwl_name}"] = {"workloadlock_name": nwl_name, "workloadlock_state": nwl_state,
                                                     "resource_version": nwl_resource_version}
    return nodes_data_map


update_release_names = list(update_release_names.generate_update_release_names())
is_update_test_failed = False


@pytest.fixture(scope='function', params=update_release_names,
                ids=[f"RELEASE={x}" for x in update_release_names])
def update_release_name(request):
    global is_update_test_failed
    # Check if the previous update steps failed
    if is_update_test_failed:
        msg = (f"Skip updating clusterrelease to {request.param} because "
               f"previous update step failed")
        LOG.info(msg)
        pytest.skip(msg)

    yield request.param

    # Check the result of the current step
    test_passed = (hasattr(request.node, 'rep_call') and
                   request.node.rep_call.passed)
    if not test_passed:
        is_update_test_failed = True


@pytest.mark.parametrize("_", ["CLUSTER_NAME={0}"
                               .format(settings.TARGET_CLUSTER)])
@pytest.mark.usefixtures("store_updated_child_cluster_description")
@pytest.mark.usefixtures("introspect_child_target_objects")
@pytest.mark.usefixtures('mcc_per_node_workload_check_after_test')
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('collect_machines_timestamps')
@pytest.mark.usefixtures('introspect_child_lcm_operation_stuck')
@pytest.mark.usefixtures('introspect_machines_stages')
@pytest.mark.usefixtures('log_start_end_timestamps')
@pytest.mark.usefixtures("check_ceph_keyrings")
@pytest.mark.usefixtures("runtime_restart_checker")
def test_update_child_clusterrelease(kaas_manager, update_release_name, _):
    """Update child cluster release"""

    pytest.skip("Direct child cluster update is disabled by feature flag, use update plan instead")

    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE

    ns = kaas_manager.get_namespace(namespace_name)
    child_cluster = ns.get_cluster(cluster_name)
    cr_before = child_cluster.clusterrelease_version

    if update_release_name == cr_before:
        msg = (f"Requested {update_release_name} is the same as current "
               f"clusterrelease version {cr_before}, skipping update")
        LOG.info(msg)
        pytest.skip(msg)

    is_patchrelease = child_cluster.is_patchrelease_upgrade(clusterrelease_version_before=cr_before,
                                                            clusterrelease_version_after=update_release_name)

    is_mosk_release = bool(re.search(r"mos.?-.*", child_cluster.clusterrelease_version))
    if is_mosk_release:
        child_kubeconfig_name, child_kubeconfig = \
            child_cluster.get_kubeconfig_from_secret()
        child_kubeconfig_path = os.path.join(
            settings.ARTIFACTS_DIR, child_kubeconfig_name)
        LOG.info("Save cluster kubeconfig to %s", child_kubeconfig_path)
        with open(child_kubeconfig_path, 'w') as f:
            f.write(child_kubeconfig)
        os_manager = openstack_manager.OpenStackManager(
            kubeconfig=child_kubeconfig_path)

    reboot_required = child_cluster.update_requires_reboot(
        cr_before=cr_before, target_clusterrelease=update_release_name)

    if reboot_required:
        LOG.info("Restart of nodes will be required during update")

    uptimes_before = child_cluster.get_machines_uptime()

    LOG.info("Collect child cluster fixed resources before update")
    lcm_type = child_cluster.data['status'].get(
        'providerStatus', {}).get('releaseRefs', {}).get(
        'current', {}).get('lcmType', '')
    LOG.info("LCM Type is {0}".format(lcm_type))
    cluster_resources_before = child_cluster.describe_fixed_resources()
    LOG.debug("Child cluster resources before update:\n{0}"
              .format(yaml.dump(cluster_resources_before)))
    ucp_tag_in_cr_before = set(
        [x['params']['ucp_tag'] for x in kaas_manager.get_clusterrelease(
            cr_before).data['spec']['machineTypes']['control']
         if 'ucp_tag' in x['params'].keys()])

    if child_cluster.is_ceph_deployed:
        # Dump all WorkLoadLocks states before update procedure
        # Save list with ModeWorkLoadLocks dicts
        nwll = [nwl.data for nwl in child_cluster.get_nodeworkloadlocks()]
        assert nwll, f"NodeWorkLoadLocks list is empty in child cluster: {child_cluster.name}"
        nodes_data_map_before = dump_nodes_info(nwll)
        LOG.info(f"Current checks status before update: \n{yaml.dump(nodes_data_map_before)}")

    LOG.info("Cluster release before update {0}".format(cr_before))
    LOG.info('ucp_tag before update {0}'.format(ucp_tag_in_cr_before))
    LOG.info(("=" * 80) + "\n\nUpdating child cluster {0} "
             "to the clusterrelease {1}\n\n"
             .format(cluster_name, update_release_name))

    ##########################
    #  Start cluster update  #
    ##########################
    child_cluster.update_cluster(update_release_name)

    if child_cluster.is_ceph_deployed:
        ceph_version_before = child_cluster.get_miraceph_version()

        if is_patchrelease:
            LOG.warning("SKIP checking ceph ClusterWorkLoadLock for a patchrelease update")
        else:
            # Ceph cwl name is hardcoded and can't be changed
            ceph_cwl_name = settings.CEPH_CWL_NAME
            LOG.info("Check ceph ClusterWorkLoadLock")
            # State inactive means that ceph cluster is started update
            # Now we need to wait for state active which means ceph
            # cluster is finished update or reconcile to check
            # that update is not required.
            child_cluster.check.wait_clusterworkloadlock_state(
                name=ceph_cwl_name, state='inactive')
            LOG.info("Ceph cwl became inactive. Continue")

    child_cluster.check.check_cluster_release(update_release_name)
    child_cluster.check.check_update_finished(timeout=settings.KAAS_CHILD_CLUSTER_UPDATE_TIMEOUT, interval=120)
    #################################
    #  Cluster update is completed  #
    #################################

    # Set reboot_required flag for non-BM machines
    reboot_expected = {machine.name: reboot_required for machine in child_cluster.get_machines()}
    # The same but for Node names
    reboot_expected_nodes = {machine.get_k8s_node_name(): reboot_expected.get(machine.name)
                             for machine in child_cluster.get_machines()}

    # Check that all workloadlock objects were updated
    if child_cluster.is_ceph_deployed:
        # Refresh nodeworkloadlock dict
        nwll = [nwl.data for nwl in child_cluster.get_nodeworkloadlocks()]
        assert nwll, f"NodeWorkLoadLocks list is empty in child cluster: {child_cluster.name}"
        nodes_data_map_after = dump_nodes_info(nwll)
        LOG.info(f"Current checks status after update: \n{yaml.dump(nodes_data_map_after)}")
        for node in nwll:
            node_name = node["spec"]["nodeName"]
            nwl_name = node["metadata"]["name"]
            nwl_id = f"{node_name}/{nwl_name}"
            node_name = node["spec"]["nodeName"]
            if not (reboot_expected_nodes[node_name]):
                LOG.debug(f"Skip checking NWL '{nwl_id}' because the node '{node_name}' was not expected to reboot")
                continue

            # NOTE(vsaienko): The nodes_data_map_before is collected before triggering update, we have a case when
            # controller with LCM functionality added with update, in this case the data will not be present before
            # triggering update. Check only that resource version is changed for objects that were present before
            # update. Also check resource version change for objects that were in active state originally as in
            # other case the controller just ignore LCM actions for node.
            if nwl_id in nodes_data_map_before:
                # Check that resourceVersion is updated
                if nodes_data_map_before[nwl_id]["workloadlock_state"] == "active":
                    assert nodes_data_map_before[nwl_id]["resource_version"] != \
                        nodes_data_map_after[nwl_id]["resource_version"], \
                        f"Resource version for nodeworkloadlock '{nwl_id}' was not updated, but expected. " \
                        f"It means that there are no changes were made for NodeWorkLoadLock object and this is FAIL," \
                        f" because node has Reboot Required flag in true"

    # Check Ceph was updated or not
    if child_cluster.is_ceph_deployed:
        ceph_version_after = child_cluster.get_miraceph_version()

        # Check Ceph was updated or not
        if ceph_version_before != ceph_version_after:
            LOG.info(f"Ceph cluster was updated from {ceph_version_before} "
                     f"to {ceph_version_after}")
        else:
            LOG.info(f"Ceph was not updated. Current "
                     f"version: {ceph_version_before}")
    # Nodes will not have updating status if there is no changes
    # in ucp_tag or CR versions
    child_cluster.check.check_cluster_nodes()
    child_cluster.check.check_k8s_pods()
    if is_mosk_release:
        LOG.info("Child cluster name contains MOS pattern. "
                 "coredns ConfigMap is going to be adjusted.")
        timeouts = settings.OPENSTACK_DEPLOY_TIMEOUT
        try:
            os_manager.get_openstackdeployment(
                name=settings.OSH_DEPLOYMENT_NAME, read=True)
        except kubernetes.client.rest.ApiException as e:
            if e.status == 404:
                LOG.info("There is no MOS apps deployed.")
        else:
            LOG.info("Configuring coredns in kube-system to work "
                     "with it.just.works domain")
            openstack_deploy.configure_dns_kubesys(
                os_manager,
                timeouts.get("3", 180)
            )

    if is_patchrelease:
        # regarding https://mirantis.jira.com/browse/PRODX-31773
        readiness_timeout = settings.CHECK_CLUSTER_READINESS_TIMEOUT + 1800
        LOG.info(f"Use extra timeout {readiness_timeout} sec. for patch release cluster readiness")
    else:
        readiness_timeout = settings.CHECK_CLUSTER_READINESS_TIMEOUT

    child_cluster.check.check_cluster_readiness(timeout=readiness_timeout)
    child_cluster.check.check_helmbundles()
    child_cluster.check.check_k8s_nodes()
    child_cluster.check.check_upgrade_stage_success()
    # Check/wait for correct docker service replicas in cluster
    # We have renamed agent if ucp changes version
    ucp_worker_agent_name = child_cluster.check.get_ucp_worker_agent_name()
    child_cluster.check.check_actual_expected_docker_services(
        changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
    child_cluster.check.check_k8s_pods()
    child_cluster.check.check_actual_expected_pods()
    child_cluster.check.check_no_leftovers_after_upgrade()

    if child_cluster.is_ceph_deployed:
        miracephfs_enabled = child_cluster.check.miracephfs_enabled()
        miraceph_sc_name = ''
        miraceph_device_class = None

        # Check if miracephfs is enabled
        if miracephfs_enabled:
            for data_pool_entry in miracephfs_enabled:
                data_pools = data_pool_entry.get('dataPools', [])
                miraceph_sc_name = data_pool_entry.get('name', '')

                # Iterate over each pool to find the first one with a device class
                for pool in data_pools:
                    miraceph_device_class = pool.get('deviceClass', None)
                    if miraceph_device_class:
                        break
                if miraceph_device_class:
                    break
        else:
            # Fallback to default pool settings if miracephfs is not enabled
            miraceph_default_pool = child_cluster.check.get_miraceph_default_pool()
            miraceph_sc_name = miraceph_default_pool.get('name', '')
            miraceph_device_class = miraceph_default_pool.get('deviceClass', None)

        child_cluster.check.check_ceph_pvc(
            cephfs_enabled=miracephfs_enabled,
            sc_name=miraceph_sc_name,
            device_class=miraceph_device_class)

    # check k8s version
    failed = {}
    for node in child_cluster.k8sclient.nodes.list_all():
        node_info = node.read().status.node_info
        if node_info.kubelet_version != node_info.kube_proxy_version:
            failed[node.name] = "kubelet_version {0} doesn't match " \
                                "kube_proxy_version {1} version".format(
                node_info.kubelet_version,
                node_info.kube_proxy_version)

    assert failed == {}, "k8s versions mismatch " \
                         "Details: {}".format(failed)
    uptimes_after = child_cluster.get_machines_uptime(dump_reboot_list=True)
    child_cluster.check.check_machines_reboot(uptimes_before, uptimes_after, reboot_expected)

    child_cluster.store_k8s_artifacts()
    child_cluster.provider_resources.save_artifact()

    # Check for changed/missing resources
    child_cluster.check.check_k8s_resources_after_update(
        cluster_resources_before,
        reboot_expected_nodes=reboot_expected_nodes)
    LOG.info("\n*** Child cluster <{0}> "
             "have been upgraded to the clusterrelease {1}"
             .format(cluster_name, update_release_name))

    if is_mosk_release:
        os_controller_version = os_manager.os_controller_version()
        LOG.info(f"OpenStack controller version: {os_controller_version}")

        os_manager.wait_os_deployment_status(timeout=settings.OPENSTACK_LCM_OPERATIONS_TIMEOUT, status="APPLIED")
        os_manager.wait_osdpl_services()

        LOG.info("Wait osdpl health status=Ready")
        os_manager.wait_openstackdeployment_health_status(
            timeout=1800)
        LOG.info("Wait os jobs to success and pods to become Ready")
        os_manager.wait_os_resources(timeout=1800)

        if not reboot_required:
            LOG.info("Will check for reboot required status is True for machines due to kernel changed after upgrade")
            machines_statuses = child_cluster.get_machines_reboot_required_status()
            is_reboot_required_msg = ""
            for machine_name, machine_reboot_required_status in machines_statuses.items():
                # Reboot is expected if a distribution was changed during upgrade,
                # or if reboot_required flag was set for the cluster version
                # in the kaasrelease 'supportedClusterReleases'
                expect_machine_reboot = reboot_expected.get(machine_name)
                if expect_machine_reboot and machine_reboot_required_status:
                    is_reboot_required_msg += (
                        f"Machine '{machine_name}' reboot should be executed during upgrade, but flag "
                        f"'Machine.status.providerStatus.reboot.required' is still 'True'\n")
                if not expect_machine_reboot and not machine_reboot_required_status:
                    is_reboot_required_msg += (
                        f"Machine '{machine_name}' have a changed kernel version but disabled rebootRequired flag. "
                        f"'Machine.status.providerStatus.reboot.required' is 'False', while it should be set to True")
            assert not is_reboot_required_msg, (
                f"Wrong reboot required status set for Machines after kernel was changed\n{is_reboot_required_msg}")

        if child_cluster.tf_enabled():
            child_cluster.mos_check.check_cassandra_nodes_config(
                os_manager=os_manager,
                actualize_nodes_config=settings.TF_CASSANDRA_NODES_CLENAUP)
            child_cluster.mos_check.check_vrouter_pods('tf-vrouter-agent', os_manager=os_manager)

    is_maintenance_skip = child_cluster.is_skip_maintenance_set(
        cr_before=cr_before,
        target_clusterrelease=child_cluster.clusterrelease_version)

    if is_patchrelease or (is_maintenance_skip and not is_patchrelease):
        child_cluster.check.check_upgraded_machines_cordon_drain_stages(
            skip_maintenance=is_maintenance_skip, reboot_expected=reboot_expected)
