#    Copyright 2024 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import pytest
from si_tests import logger
from si_tests import settings

from si_tests.utils import exceptions

LOG = logger.logger


@pytest.mark.usefixtures("introspect_distribution_not_changed")
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.parametrize("_", [f"CLUSTER_NAME={settings.TARGET_CLUSTER}"])
@pytest.mark.usefixtures('create_hoc_before_lcm_and_delete_after')
def test_runtime_migration_with_bad_machine(kaas_manager, _, show_step):
    """Runtime migration of all machines of the cluster, which contains a broken Machine which stuck on migration

    Scenario:
        1. Check init state cluster
        2. Make a cluster Machine broken (should not start docker and lcm-agent)
        3. Launch runtime migration process
        4. Wait until migration process is stuck on the broken Machine and fix this machine
        5. Ensure that migration process is completed
        6. Check Cluster and nodes status
    """
    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE
    ns = kaas_manager.get_namespace(namespace_name)
    cluster = ns.get_cluster(cluster_name)

    show_step(1)
    LOG.info(f"Check init state on the {cluster._cluster_type} cluster {cluster.namespace}/{cluster.name}")
    cluster.check.check_machines_status()
    cluster.check.check_cluster_readiness()
    cluster.check.check_k8s_nodes()

    #####################################
    # Select machine to make it broken
    #####################################
    show_step(2)
    target_machine = cluster.day2operations.get_machine_to_disable()
    expected_stuck_machine_name = target_machine.name
    LOG.banner(f"Expected bad machine: '{expected_stuck_machine_name}'")

    # We use this way to broke machine in runtime migration tests too, no need to create new one
    cluster.day2operations.make_broken_reboot_for_day2_operations(target_machine)

    show_step(3)
    LOG.info(f"Start runtime migration process in cluster {cluster.namespace}/{cluster.name}")
    old_runtime = cluster.runtime.runtime
    assert old_runtime in ['docker', 'containerd'], (f"Cluster have wrong runtime {old_runtime} or "
                                                     f"migration in progress.")
    target_runtime = ''
    if old_runtime == 'docker':
        target_runtime = 'containerd'
    elif old_runtime == 'containerd':
        target_runtime = 'docker'

    LOG.banner(f"Target runtime: {target_runtime}")
    machines = cluster.get_machines()
    cluster.runtime.partial_migrate(machines, target_runtime)

    # Migrating BM machines takes about 10-15 minutes, but sometimes may take 25+ minutes
    machine_migrations_timeout = 1800 * len(machines)

    show_step(4)
    ####################################################################################
    # Expect LCMStuckException for the broken Machine while migration is executed      #
    ####################################################################################
    try:
        LOG.info(f"*** Expect LCM stuck for one of the following broken Machines: "
                 f"{expected_stuck_machine_name}")
        cluster.check.wait_migration_stuck(timeout=machine_migrations_timeout,
                                           expected_stuck_machine_names=[expected_stuck_machine_name],
                                           target_runtime=target_runtime)
        raise Exception(f"Runtime migration should be failed, but it is successfully finished with broken LCMMachines, "
                        f"please check the status of LCMMachine '{expected_stuck_machine_name}'")
    except exceptions.LCMStuckException as e:
        LOG.info(f"Got the expected condition for the broken Machine: {e}")

    # 1. Check that target_machine is stuck or disabled
    assert target_machine.is_lcmmachine_stuck(), f"Target Machine '{target_machine.name}' is still not stuck"

    # We use this function to fix machine in runtime migration tests too, no need to create new one
    cluster.day2operations.fix_broken_reboot_for_day2_operations(machines=[target_machine], start_services=False)

    show_step(5)
    LOG.info(f"Check cluster {cluster.namespace}/{cluster.name} runtime migration "
             f"will be completed after machine fix")
    cluster.check.check_runtime()
    ##########################################
    #  Cluster machines migration process is completed  #
    ##########################################
    LOG.banner("Runtime migration process is completed", sep="#")

    show_step(6)
    LOG.banner("Check cluster Machines readiness")
    cluster.check.check_machines_status()
    LOG.banner("Check Cluster conditions readiness")
    cluster.day2operations.check_cluster_readiness(exp_provider_status=False,
                                                   timeout=settings.CHECK_CLUSTER_READINESS_TIMEOUT + 1800)
    LOG.banner("Check cluster nodes count")
    cluster.check.check_cluster_nodes()
    LOG.banner("Check cluster Pods")
    cluster.check.check_k8s_pods()
    LOG.banner("Check cluster Nodes readiness")
    cluster.check.check_k8s_nodes()
