import pytest
import yaml
import os

from si_tests.managers.openstack_client_manager import OpenStackClientManager
from si_tests.deployments.utils import file_utils
from si_tests import logger
from si_tests import settings
from si_tests.utils import waiters, utils

LOG = logger.logger

TEMPLATE_PATH = file_utils.join(os.path.dirname(os.path.abspath(__file__)),
                                "../lcm/templates/heat_stack_simple.yaml")


def test_prepare_openstack_cluster(kaas_manager):
    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE
    ns = kaas_manager.get_namespace(namespace_name)
    child_cluster = ns.get_cluster(cluster_name)
    child_kubeconfig_name, child_kubeconfig = child_cluster.get_kubeconfig_from_secret()
    with open('child_conf', 'w') as f:
        f.write(child_kubeconfig)
    openstack_client_manager = OpenStackClientManager(kubeconfig='child_conf')
    LOG.info("Unset quotas for volumes and instances")
    openstack_client_manager.quota.set(["--volumes", "-1", "--instances", "-1", "--cores", "-1",
                                        "--class", "default"])
    stacks_to_clean = []
    hosts = openstack_client_manager.host.list([])
    stack_names = [s.get('Stack Name') for s in openstack_client_manager.stack.list([])]
    for host in hosts:
        hostname = host.get('Host Name')
        stack = 'stack-' + hostname
        if stack in stack_names:
            stacks_to_clean.append(stack)
    if stacks_to_clean:
        LOG.info(f"Found next stacks: {stacks_to_clean}. These stacks will be removed as their names will "
                 f"conflict with stacks created during this test")
        for s in stacks_to_clean:
            LOG.info(f"Removing stack {s}")
            openstack_client_manager.stack.delete([s, "-y", "--wait"])


# PRODX-51676
# @pytest.mark.usefixtures("collect_downtime_statistics")  # Should be used if ALLOW_WORKLOAD == True
# def test_ha_freeze_mariadb_server(kaas_manager):
#     """Freeze mysqld service with primary role
#
#     Executed only on MOSK cluster
#
#     Precondition - all expected pods and their replicas must be presented
#     The following scenario is executed
#
#     Scenario:
#         1. SSH to node
#         2. Freeze mysqld process
#         3. Check openstack main services are fully up
#         4. Check workloads can be created
#         5. Check pods are Ready
#
#     Expected result - all mysqld services restored successfully.
#     """
#     cluster_name = settings.TARGET_CLUSTER
#     namespace_name = settings.TARGET_NAMESPACE
#     ns = kaas_manager.get_namespace(namespace_name)
#     child_cluster = ns.get_cluster(cluster_name)
#     if child_cluster.is_mosk:
#         child_cluster.check.check_k8s_pods()
#         mariadb_pod = child_cluster.k8sclient.pods.list_starts_with('mariadb-server')[0]
#         mariadb_pod_name = mariadb_pod.name
#         k8s_node_name = mariadb_pod.node_name
#         machine = child_cluster.get_machine_by_k8s_name(k8s_node_name)
#         container_name_to_pause = "mariadb"
#         process_name_to_pause = "mysqld"
#         pod_id = machine._run_cmd(f"sudo crictl pods | grep {mariadb_pod_name} | cut -d ' ' -f 1",
#                                   ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
#         container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
#                                         f"| grep -v CONTAINER | cut -d ' ' -f 1",
#                                         ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
#         mysqld_pid = child_cluster.ha.pids_of_process_in_container_for_containerd(machine,
#                                                                                   container_id,
#                                                                                   process_name_to_pause)
#         machine._run_cmd(f"sudo kill -STOP {mysqld_pid}",
#                          ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
#         LOG.info("Waiting for services down")
#         waiters.wait(lambda: not _check_openstack_services_status(child_cluster, services=['compute', 'volume']),
#                      interval=180, timeout=900)
#
#         child_cluster.check.check_k8s_pods()
#         LOG.info("Creating server on every compute")
#         create_host_servers(child_cluster)
#
#         LOG.info("Resume mysqld")
#         machine._run_cmd(f"sudo kill -CONT {mysqld_pid}",
#                          ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
#         LOG.info("Waiting all openstack services are up")
#         waiters.wait(lambda: _check_openstack_services_status(child_cluster, services=['compute', 'volume']),
#                      interval=60, timeout=600)
#         child_cluster.check.check_k8s_pods()
#     else:
#         LOG.info("\nHA tests to kill mariadb were skipped "
#                  "as child cluster isn't MOSK\n")
#         pytest.skip("HA tests to kill mariadb were skipped")


@pytest.mark.usefixtures("collect_downtime_statistics")  # Should be used if ALLOW_WORKLOAD == True
def test_ha_freeze_nova_scheduler(kaas_manager):
    """Freeze nova scheduler service

    Executed only on MOSK cluster

    Precondition - all expected pods and their replicas must be presented
    The following scenario is executed

    Scenario:
        1. SSH to node
        2. Freeze nova scheduler process for pod
        3. Check that workloads can be created
        4. Resume scheduler process
        5. Check openstack main services are fully up
        6. Check pods are Ready

    Expected result - all nova scheduler services restored successfully.
    """
    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE
    ns = kaas_manager.get_namespace(namespace_name)
    child_cluster = ns.get_cluster(cluster_name)

    if child_cluster.is_mosk:
        child_cluster.check.check_k8s_pods()
        scheduler_pod = child_cluster.k8sclient.pods.list_starts_with('nova-scheduler')[0]
        scheduler_pod_name = scheduler_pod.name
        k8s_node_name = scheduler_pod.node_name
        machine = child_cluster.get_machine_by_k8s_name(k8s_node_name)
        container_name_to_pause = "nova-scheduler"
        process_name_to_pause = "nova-scheduler"
        pod_id = machine._run_cmd(f"sudo crictl pods | grep {scheduler_pod_name} | cut -d ' ' -f 1",
                                  ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                        f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                        ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        scheduler_pid = child_cluster.ha.pids_of_process_in_container_for_containerd(machine,
                                                                                     container_id,
                                                                                     process_name_to_pause)
        machine._run_cmd(f"sudo kill -STOP {scheduler_pid}",
                         ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
        waiters.wait(lambda: not _check_openstack_services_status(child_cluster, services=['compute']),
                     interval=60, timeout=600)
        LOG.info("Creating server on every compute")
        create_host_servers(child_cluster)
        LOG.info("Resume nova-scheduler")
        try:
            machine._run_cmd(f"sudo kill -CONT {scheduler_pid}",
                             ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
        except Exception:
            LOG.info(f"Processes {scheduler_pid} don't exist anymore.")
            container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                            f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                            ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
            scheduler_pid = child_cluster.ha.pids_of_process_in_container_for_containerd(machine,
                                                                                         container_id,
                                                                                         process_name_to_pause)
            LOG.info(f"New nova-scheduler PIDs: {scheduler_pid}")
        LOG.info("Waiting all openstack services are up")
        waiters.wait(lambda: _check_openstack_services_status(child_cluster, services=['compute']),
                     interval=60, timeout=600)
        child_cluster.check.check_k8s_pods()
    else:
        LOG.info("\nHA tests to freeze nova-scheduler were skipped "
                 "as child cluster isn't MOSK\n")
        pytest.skip("HA tests to freeze nova-scheduler were skipped")


@pytest.mark.usefixtures("collect_downtime_statistics")  # Should be used if ALLOW_WORKLOAD == True
def test_ha_freeze_nova_conductor(kaas_manager):
    """Freeze nova conductor service

    Executed only on MOSK cluster

    Precondition - all expected pods and their replicas must be presented
    The following scenario is executed

    Scenario:
        1. SSH to node
        2. Freeze nova conductor process for pod
        3. Check that workloads can be created
        4. Resume nova conductor process
        5. Check openstack main services are fully up
        6. Check pods are Ready

    Expected result - all nova scheduler services restored successfully.
    """
    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE
    ns = kaas_manager.get_namespace(namespace_name)
    child_cluster = ns.get_cluster(cluster_name)

    if child_cluster.is_mosk:
        child_cluster.check.check_k8s_pods()
        conductor_pod = child_cluster.k8sclient.pods.list_starts_with('nova-conductor')[0]
        conductor_pod_name = conductor_pod.name
        k8s_node_name = conductor_pod.node_name
        machine = child_cluster.get_machine_by_k8s_name(k8s_node_name)
        container_name_to_pause = "nova-conductor"
        process_name_to_pause = "nova-conductor"
        pod_id = machine._run_cmd(f"sudo crictl pods | grep {conductor_pod_name} | cut -d ' ' -f 1",
                                  ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                        f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                        ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        conductor_pids = child_cluster.ha.pids_of_process_in_container_for_containerd(machine,
                                                                                      container_id,
                                                                                      process_name_to_pause)
        machine._run_cmd(f"sudo kill -STOP {conductor_pids}",
                         ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
        waiters.wait(lambda: not _check_openstack_services_status(child_cluster, services=['compute']),
                     interval=60, timeout=600)
        LOG.info("Creating server on every compute")
        create_host_servers(child_cluster)
        LOG.info("Resume nova-conductor")
        try:
            machine._run_cmd(f"sudo kill -CONT {conductor_pids}",
                             ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
        except Exception:
            LOG.info(f"Processes {conductor_pids} don't exist anymore.")
            container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                            f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                            ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
            conductor_pids = child_cluster.ha.pids_of_process_in_container_for_containerd(machine,
                                                                                          container_id,
                                                                                          process_name_to_pause)
            LOG.info(f"New nova-conductor PIDs: {conductor_pids}")
        LOG.info("Waiting all openstack services are up")
        waiters.wait(lambda: _check_openstack_services_status(child_cluster, services=['compute']),
                     interval=60, timeout=600)
        # child_cluster.check.check_k8s_pods()
    else:
        LOG.info("\nHA tests to freeze nova-conductor were skipped "
                 "as child cluster isn't MOSK\n")
        pytest.skip("HA tests to freeze nova-conductor were skipped")


@pytest.mark.usefixtures("collect_downtime_statistics")  # Should be used if ALLOW_WORKLOAD == True
def test_ha_freeze_neutron_server(kaas_manager):
    """Freeze neutron server api workers

    Executed only on MOSK cluster

    Precondition - all expected pods and their replicas must be presented
    The following scenario is executed

    Scenario:
        1. SSH to node
        2. Freeze neutron server api workers
        3. Check that workloads can be created
        4. Resume neutron server process
        5. Check openstack main services are fully up
        6. Check pods are Ready

    Expected result - all services restored successfully.
    """
    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE
    ns = kaas_manager.get_namespace(namespace_name)
    child_cluster = ns.get_cluster(cluster_name)

    if child_cluster.is_mosk:
        child_cluster.check.check_k8s_pods()
        neutron_pod = child_cluster.k8sclient.pods.list_starts_with('neutron-server')[0]
        neutron_pod_name = neutron_pod.name
        k8s_node_name = neutron_pod.node_name
        machine = child_cluster.get_machine_by_k8s_name(k8s_node_name)
        container_name_to_pause = "neutron-server"
        process_name_to_pause = "neutron-server"
        pod_id = machine._run_cmd(f"sudo crictl pods | grep {neutron_pod_name} | cut -d ' ' -f 1",
                                  ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                        f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                        ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        neutron_api_pids = child_cluster.ha.pids_of_process_in_container_for_containerd(machine,
                                                                                        container_id,
                                                                                        process_name_to_pause)
        machine._run_cmd(f"sudo kill -STOP {neutron_api_pids}",
                         ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
        LOG.info("Creating server on every compute")
        create_host_servers(child_cluster)
        LOG.info("Resume neutron-server")
        try:
            machine._run_cmd(f"sudo kill -CONT {neutron_api_pids}",
                             ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)
        except Exception:
            LOG.info(f"Processes {neutron_api_pids} don't exist anymore.")
            container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                            f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                            ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
            neutron_api_pids = child_cluster.ha.pids_of_process_in_container_for_containerd(machine,
                                                                                            container_id,
                                                                                            process_name_to_pause)
            LOG.info(f"New neutron-api PIDs: {neutron_api_pids}")

        child_cluster.check.check_k8s_pods()
    else:
        LOG.info("\nHA tests to freeze neutron-server were skipped "
                 "as child cluster isn't MOSK\n")
        pytest.skip("HA tests to freeze neutron-server were skipped")


@pytest.mark.usefixtures("collect_downtime_statistics")  # Should be used if ALLOW_WORKLOAD == True
def test_ha_freeze_keystone_api(kaas_manager):
    """Freeze keystone api

    Executed only on MOSK cluster

    Precondition - all expected pods and their replicas must be presented
    The following scenario is executed

    Scenario:
        1. SSH to node
        2. Freeze keystone api
        3. Check that workloads can be created
        4. Resume keystone api process
        5. Check openstack main services are fully up
        6. Check pods are Ready

    Expected result - all services restored successfully.
    """
    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE
    ns = kaas_manager.get_namespace(namespace_name)
    child_cluster = ns.get_cluster(cluster_name)

    if child_cluster.is_mosk:
        child_cluster.check.check_k8s_pods()
        keystone_api_pod = child_cluster.k8sclient.pods.list_starts_with('keystone-api')[0]
        keystone_api_pod_name = keystone_api_pod.name
        k8s_node_name = keystone_api_pod.node_name
        machine = child_cluster.get_machine_by_k8s_name(k8s_node_name)
        container_name_to_pause = "keystone-api"
        process_name_to_pause = "apache2"
        pod_id = machine._run_cmd(f"sudo crictl pods | grep {keystone_api_pod_name} | cut -d ' ' -f 1",
                                  ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                        f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                        ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        apache_pids = child_cluster.ha.pids_of_process_in_container_for_containerd(machine,
                                                                                   container_id,
                                                                                   process_name_to_pause)
        machine._run_cmd(f"sudo kill -STOP {apache_pids}",
                         ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)

        def _check_container_restarted(cont_id):
            new_container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                                f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                                ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
            return True if new_container_id != cont_id else False

        waiters.wait(lambda: _check_container_restarted(cont_id=container_id), timeout=600)

        LOG.info("Creating server on every compute")
        create_host_servers(child_cluster)
        child_cluster.check.check_k8s_pods()
    else:
        LOG.info("\nHA tests to freeze keystone-api were skipped "
                 "as child cluster isn't MOSK\n")
        pytest.skip("HA tests to freeze keystone-api were skipped")


@pytest.mark.usefixtures("collect_downtime_statistics")  # Should be used if ALLOW_WORKLOAD == True
def test_ha_freeze_cinder_volume(kaas_manager):
    """Freeze cinder volume

    Executed only on MOSK cluster

    Precondition - all expected pods and their replicas must be presented
    The following scenario is executed

    Scenario:
        1. SSH to node
        2. Freeze cinder volume process
        3. Check that workloads with volumes can be created
        4. Check openstack main services are fully up
        5. Check pods are Ready

    Expected result - all cinder volume services restored successfully.
    """
    cluster_name = settings.TARGET_CLUSTER
    namespace_name = settings.TARGET_NAMESPACE
    ns = kaas_manager.get_namespace(namespace_name)
    child_cluster = ns.get_cluster(cluster_name)

    if child_cluster.is_mosk:
        child_cluster.check.check_k8s_pods()
        cinder_volume_pod = child_cluster.k8sclient.pods.list_starts_with('cinder-volume')[0]
        cinder_volume_pod_name = cinder_volume_pod.name
        k8s_node_name = cinder_volume_pod.node_name
        machine = child_cluster.get_machine_by_k8s_name(k8s_node_name)
        container_name_to_pause = "cinder-volume"
        container_namespace = "k8s.io"
        LOG.info(f"Accessing {machine.name}")
        pod_id = machine._run_cmd(f"sudo crictl pods | grep {cinder_volume_pod_name} | cut -d ' ' -f 1",
                                  ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        container_id = machine._run_cmd(f"sudo crictl ps --name {container_name_to_pause} --pod {pod_id} "
                                        f"| grep -v CONTAINER | cut -d ' ' -f 1",
                                        ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        ctr_container_id = machine._run_cmd(f"sudo ctr --namespace {container_namespace} container ls | "
                                            f"grep {container_id} | cut -d ' ' -f 1",
                                            ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE).stdout_str
        machine._run_cmd(f"sudo ctr --namespace {container_namespace} tasks pause {ctr_container_id}",
                         ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)

        waiters.wait(lambda: not _check_openstack_services_status(child_cluster, services=['volume']),
                     interval=60, timeout=600)
        LOG.info("Creating server on every compute")
        create_host_servers(child_cluster)
        LOG.info("Resume cinder-volume")

        machine._run_cmd(f"sudo ctr --namespace {container_namespace} tasks resume {ctr_container_id}",
                         ssh_key=settings.HA_TEST_PRIVATE_KEY_FILE)

        LOG.info("Waiting all openstack services are up")
        waiters.wait(lambda: _check_openstack_services_status(child_cluster, services=['volume']),
                     interval=60, timeout=600)
        child_cluster.check.check_k8s_pods()
    else:
        LOG.info("\nHA tests to freeze cinder volume were skipped "
                 "as child cluster isn't MOSK\n")
        pytest.skip("HA tests to freeze cinder volume were skipped")


def create_host_servers(child_cluster):
    child_kubeconfig_name, child_kubeconfig = child_cluster.get_kubeconfig_from_secret()
    with open('child_conf', 'w') as f:
        f.write(child_kubeconfig)
    openstack_client_manager = OpenStackClientManager(kubeconfig='child_conf')
    LOG.info("Get compute hosts names")
    hosts = openstack_client_manager.host.list([])
    ssh_keys = utils.generate_keys()
    stacks_data = {}
    for hs in hosts:
        if hs.get('Service') == 'compute' and 'ironic' not in hs.get('Host Name'):
            hostname = hs['Host Name']
            stack_params = {"host_name": hs['Zone'] + ":" + hostname, "key_name": "keyha-" + hostname,
                            "image_name_1": openstack_client_manager.cirros_image_name,
                            "ssh_public_key": f"ssh-rsa {ssh_keys['public']}",
                            "ssh_private_key": ssh_keys["private"]}
            stacks_data["stack-" + hostname] = stack_params
    for stack_name, stack_data in stacks_data.items():
        openstack_client_manager.create_stack(None, stack_name, TEMPLATE_PATH,
                                              'child_conf', stack_data, False)
    for stack_name, _ in stacks_data.items():
        LOG.info(f"Removing stack {stack_name}")
        openstack_client_manager.stack.delete([stack_name, "-y", "--wait"])


def _check_openstack_services_status(child_cluster, services):
    services_list = []
    client_pod = child_cluster.k8sclient.pods.list(
        namespace="openstack",
        name_prefix='keystone-client')[0]
    for service in services:
        cmd_service_list = ['/bin/sh', '-c', f'PYTHONWARNINGS=ignore::UserWarning '
                                             f'openstack {service} service list -f yaml']
        try:
            data = yaml.safe_load(client_pod.exec(cmd_service_list))
        except Exception as e:
            LOG.error(f"An error occured during services status check: {e}")
            data = None
        if data:
            services_list.extend(data)
    LOG.info(f"Services list: {services_list}")
    is_all_up = all([item.get('State', '') == 'up' for item in services_list])
    if not is_all_up:
        failed_services = [service for service in services_list if service.get('State') != 'up']
        LOG.info(f'Some services are not fully up:\n{yaml.dump(failed_services)}')
    return is_all_up
