import re
import time

from kubernetes.client.rest import ApiException
from tabulate import tabulate

from retry import retry
from si_tests import settings
from si_tests import logger
from si_tests.utils import utils, waiters, exceptions, k8s_utils

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from si_tests.managers.kcm_manager import ClusterDeployment

LOG = logger.logger


class ClusterDeploymentCheckManager(object):
    """Cluster deployment check manager"""

    EXCLUDED_PODS = []
    EXCLUDED_JOBS = []
    _cap_pods_logs = {}

    def __init__(self, clusterdeployment: "ClusterDeployment"):
        self._clusterdeployment: "ClusterDeployment" = clusterdeployment

    @property
    def clusterdeployment(self) -> "ClusterDeployment":
        return self._clusterdeployment

    @property
    def k8sclient(self):
        return self._clusterdeployment.k8sclient

    def collect_machines_data(self):
        machines = self.clusterdeployment.get_machines(raise_if_empty=False)
        machines_data = []
        for machine in machines:
            machine_data = machine.data
            # machine_provider = machine.capimachine_provider
            machine_provider_id = machine_data['spec'].get('providerID') or ""
            machine_status = machine_data.get("status") or {}
            machine_phase = machine_status.get("phase")

            machine_conditions = machine_status.get('conditions')
            # Here, "Ready" is not a status, but the condition name.
            machine_readiness_condition = [x for x in machine_conditions if x.get("type") == "Ready"]
            if machine_readiness_condition:
                machine_condition_message = machine_readiness_condition[0].get("message")
                machine_condition_ready = machine_readiness_condition[0].get("status")
            else:
                machine_condition_message = "NO MACHINE PROVIDER CONDITION"
                machine_condition_ready = None

            machine_provider_conditions = (machine_status.get(settings.CAPI_OPERATOR_APIVERSION, {}).
                                           get('conditions', []))
            # Here, "Ready" is not a status, but the condition name.
            machine_provider_readiness_condition = [x for x in machine_provider_conditions if x.get("type") == "Ready"]
            if machine_provider_readiness_condition:
                machine_provider_message = machine_provider_readiness_condition[0].get("message")
            else:
                machine_provider_message = "NO MACHINE PROVIDER CONDITION"

            # Collect data for status message
            machines_data.append({
                "name": machine.name,
                # "provider": machine_provider,
                "provider_id": machine_provider_id,
                "phase": machine_phase,
                "ready": machine_condition_ready,
                "message": machine_condition_message,
                "provider_message": machine_provider_message,
            })
        return machines_data

    def show_machines_conditions(self):
        # Get Machines data
        # headers = ["Machine", "Provider", "Phase", "Ready", "Message", "Provider message"]
        headers = ["Machine", "ProviderID", "Phase", "Ready", "Message", "Provider message"]
        machines_data = self.collect_machines_data()
        status_data = [[data["name"],
                        # data["provider"],
                        data["provider_id"],
                        data["phase"],
                        data["ready"],
                        data["message"],
                        data["provider_message"]]
                       for data in machines_data]
        # Show Machines status and not ready conditions
        status_msg = tabulate(status_data, tablefmt="presto", headers=headers)
        LOG.info(f"Machines status:\n{status_msg}\n")
        return machines_data

    # TODO(va4st): Extend function with other dynamic resources like machines, control plane
    #  machine sets, etc (rely on template, provider, etc)
    def _get_clusterdeployment_readiness(self, expected_condition_cld_fails=None, expected_condition_svc_fails=None,
                                         interval=None):
        """Get conditions from Cluster Deployment

        :rtype bool: True only if all conditions are true, False in other case
        """
        cluster_result = [
            self.clusterdeployment.are_conditions_ready(expected_fails=expected_condition_cld_fails,
                                                        verbose=True),
            self.clusterdeployment.are_conditions_ready(expected_fails=expected_condition_svc_fails,
                                                        verbose=True, mode='services')]
        # NOTE(va4st): If no machines in cluster (like in AKS by design or at all) - no needs to fill the log with
        # empty table
        if self.clusterdeployment.get_machines(raise_if_empty=False):
            self.show_machines_conditions()
        if settings.ENABLE_INTROSPECT_CAP_ERRORS:
            self.introspect_cap_errors(since_seconds=interval+15)
        return all(cluster_result)

    def check_cluster_readiness(self, timeout=settings.CHECK_CLUSTER_READINESS_TIMEOUT, interval=60,
                                expected_condition_cld_fails=None,
                                expected_condition_svc_fails=None):
        """
        Check that overall clusterdeployment is Ready
        and no unexpected deployments failed
        :param timeout: timeout for waiter
        :type timeout: int
        :param interval: interval to check status
        :type interval: int
        :param expected_condition_cld_fails: dict with conditions like
            { <condition type>: <part of condition message to match> , }
        :type expected_condition_cld_fails: Dict[str]
        :param expected_condition_svc_fails: dict with conditions like
            { <condition type>: <part of condition message to match> , }
        :type expected_condition_svc_fails: Dict[str]

        :rtype bool: bool
        """
        LOG.info("Checking readiness for clusterdeployment "
                 f"'{self.clusterdeployment.namespace}/{self.clusterdeployment.name}'")

        timeout_msg = "Timeout waiting for clusterdeployment readiness"
        self._cap_pods_logs = {}
        try:
            waiters.wait(lambda: self._get_clusterdeployment_readiness(
                expected_condition_cld_fails,
                expected_condition_svc_fails,
                interval=interval),
                timeout=timeout,
                interval=interval,
                timeout_msg=timeout_msg)
        except exceptions.TimeoutError as e:
            raise e

        LOG.info("Cluster checked")

    @staticmethod
    def _check_service_condition_reason(obj: "ClusterDeployment", service_type, expected_reason, stop_on_fail=True):
        """
        Check that Helm Service reason is equal expected one
        :param obj: ClusterDeployment
        :param expected_reason: <str> 'True'/'False'
        :return: <bool> True/False
        """
        obj_status = obj.get_service_condition(service_type)
        if not obj_status:
            LOG.warning(f'"{service_type}" status condition not populated yet. '
                        f'Returning False as condition check result.')
            return False

        status = obj_status['status']
        actual_reason = obj_status['reason']
        message = obj_status['message']
        log_message = (f"\n > Expected '{service_type}' reason is {expected_reason}, "
                       f"actual reason is {actual_reason}.\n"
                       f" > Status: {status}\n")
        if message:
            log_message += f"Message: {message}"
        LOG.info(log_message)
        if not eval(status) and stop_on_fail:
            LOG.error(f"'{service_type}' status condition is {status}. Message: {message}")
            raise RuntimeError(message)
        return actual_reason == expected_reason

    @retry(RuntimeError, delay=40, tries=3, logger=LOG)
    def wait_service_condition(self, service_type, expected_status='Provisioned', timeout=1200, interval=15):
        """
        Wait clusterdeployment to reach expected Helm service status
        :param expected_status: <str> 'True'/'False'
        :param timeout: timeout to wait
        :param interval: time between checks
        :return: None
        """
        LOG.info(f"Waiting for '{service_type}' service condition reason == '{expected_status}' ...")
        waiters.wait(lambda: self._check_service_condition_reason(
                         self.clusterdeployment, service_type, expected_status),
                     timeout=timeout,
                     interval=interval,
                     timeout_msg=f"Timeout for waiting expected '{service_type}' service condition status "
                                 f"in cluster {self.clusterdeployment.namespace}/{self.clusterdeployment.name} "
                                 f"after {timeout} sec.")

    def check_actual_expected_pods(self,
                                   expected_pods=None,
                                   exclude_pods=None):
        """Compare expected list of pods (which is fetched automatically,
           unless explicitly provided) with actual list of pods in this
           cluster. Comparison is conducted for all namespaces by default"""

        if settings.SKIP_EXPECTED_POD_CHECK:
            LOG.info("Skipping expected pods checking")
            return

        LOG.info("Checking that all pods and their replicas are in place")

        if not expected_pods:
            if exclude_pods:
                expected_pods, do = self.clusterdeployment.get_expected_objects(
                    exclude_pods=exclude_pods)
            else:
                expected_pods = self.clusterdeployment.expected_pods

        k8s_utils.wait_expected_pods(self.k8sclient, expected_pods=expected_pods)

    def _get_replicasets_by_namespace(self, target_namespaces):
        rs_info = {}
        if isinstance(target_namespaces, str):
            target_namespaces = [target_namespaces]
        all_rs = self.k8sclient.replicasets.list_all()
        for rs in all_rs:
            if target_namespaces and rs.namespace not in target_namespaces:
                continue
            rs.spec = rs._read_cache.spec  # spec in _read_cache from ReplicaSetList contains required values
            rs_info.setdefault(rs.namespace, {})[f"{rs.kind}/{rs.uid}"] = rs
        return rs_info

    def check_k8s_pods(self, phases=('Running', 'Succeeded'),
                       target_namespaces=None,
                       timeout=settings.WAIT_PODS_READY_TIMEOUT,
                       interval=30, pods_prefix=''):
        """Wait till all expected pods for cluster are in specified
           phase and have Ready=True for all containers
        Args:
            phases: list of expected pod phases
            target_namespaces: namespace (str) or namespaces (list)
                               where pods should be checked
            timeout: timeout to wait
            interval: time between checks
        """

        def wait_for_running_pods(pause=60):
            """Get the pods statuses and compare the restart counts

            :param pause: int, seconds, pause between restart checks
            """
            LOG.info("Check k8s pods status")
            rs_info = self._get_replicasets_by_namespace(target_namespaces)
            # 404 appeared when a just created Management cluster don't have /clusters api-resource yet
            # or sometimes resource can disappear at checking procedure, and we will receive 404
            try:
                pods_info = self.k8sclient.pods.check_pods_statuses(
                    target_namespaces=target_namespaces, phases=phases,
                    excluded_pods=self.EXCLUDED_PODS,
                    pods_prefix=pods_prefix, replicasets=rs_info)
                if not pods_info:
                    return False
            except ApiException as ex:
                if ex.status != 404:
                    raise ex
                else:
                    LOG.error(f"Error happened while checking pods statuses: {ex}")
                    return False

            LOG.info(f"Wait for {pause} seconds before the second check")
            time.sleep(pause)

            LOG.info("Check k8s pods status one more time")
            rs_info = self._get_replicasets_by_namespace(target_namespaces)
            try:
                pods_info_new = self.k8sclient.pods.check_pods_statuses(
                    target_namespaces=target_namespaces, phases=phases,
                    excluded_pods=self.EXCLUDED_PODS,
                    pods_prefix=pods_prefix, replicasets=rs_info)
                if not pods_info_new:
                    return False
            except ApiException as ex:
                if ex.status != 404:
                    raise ex
                else:
                    LOG.error(f"Error happened while checking pods statuses: {ex}")
                    return False

            result = True
            for pod, info in pods_info.items():
                if pod not in pods_info_new:
                    LOG.warning(f"Pod {pod} disappeared after {pause} seconds")
                    result = False
                    continue
                cont_restarts_new = pods_info_new[pod]['containers_restarts']
                for container, restarts in info['containers_restarts'].items():
                    if container not in cont_restarts_new:
                        LOG.warning(f"Container {container} from pod {pod} "
                                    f"disappeared after {pause} seconds")
                        result = False
                        continue
                    restarts_new = cont_restarts_new[container]
                    if restarts_new > restarts:
                        LOG.warning(f"Container {container} from pod {pod} "
                                    f"has been restarted after {pause} seconds"
                                    f", current restarts: {restarts_new}")
                        result = False

            if result:
                LOG.info("All pods are in correct state")
            return result

        if not target_namespaces:
            target_namespaces = self.clusterdeployment.get_expected_namespaces()
        LOG.info(f"Checking k8s pods phase and containers status in namespaces {target_namespaces}")
        try:
            # Wait for pods status
            waiters.wait(wait_for_running_pods,
                         timeout=timeout, interval=interval)

            # Wait for jobs
            waiters.wait(
                lambda: self.k8sclient.jobs.check_jobs_completed(
                    target_namespaces=target_namespaces,
                    excluded_jobs=self.EXCLUDED_JOBS,
                    jobs_prefix=pods_prefix),
                timeout=timeout, interval=interval)
        except exceptions.TimeoutError:
            pods = self.k8sclient.pods.list_raw().to_dict()['items']
            if target_namespaces:
                if isinstance(target_namespaces, str):
                    target_namespaces = [target_namespaces]
                pods = [pod for pod in pods
                        if pod['metadata']['namespace'] in target_namespaces]
            utils.print_pods_status(pods)
            failed_pods = {}
            ready_text = " and Ready=True for " \
                         "each container"
            for pod, info in \
                    self.k8sclient.pods.get_pods_statuses(pods).items():
                # Check for the expected pod phase
                if info['phase'] not in phases:
                    failed_pods[pod] = info
                # Check for containers status inside the non-completed pod
                if (False in info['containers'].values()
                        and info['phase'] not in 'Succeeded'):
                    failed_pods[pod] = info
            err = f"Timeout waiting for pods statuses. " \
                  f"After {timeout} sec next pods are not in {phases} phase" \
                  f"{ready_text}: {failed_pods}"

            raise TimeoutError(err)
        LOG.info("All pods and jobs are in correct state")

    def check_k8s_nodes(self, timeout=360, interval=10):
        LOG.info("Checking k8s nodes status")
        try:
            waiters.wait(
                lambda: self.k8sclient.nodes.all_ready(),
                timeout=timeout, interval=interval)
        except exceptions.TimeoutError:
            nodes = {key: value for key, value in
                     self.k8sclient.nodes.list_statuses().items()
                     if value != 'True'}
            err = f"Timeout waiting for nodes to be Ready. " \
                  f"After {timeout} sec next nodes are not Ready: {nodes}"
            raise exceptions.TimeoutError(err)
        LOG.info("All k8s nodes are Ready")

    def check_clusterdeployment_deleted(self, timeout=1800, interval=15):
        """
        Check removal process of clusterdeployment. Also verify leftovers absence after removal.
        :param timeout:
        :param interval:
        :return:
        """
        # (va4st): infracluster is mandatory here and should be already initialized.
        infracluster = self.clusterdeployment.clusterobject.infracluster

        waiters.wait(lambda: not self.clusterdeployment.present(),
                     timeout=timeout, interval=interval,
                     timeout_msg='Timeout waiting for cluster deletion')
        LOG.info(f"Cluster {self.clusterdeployment.name} has been deleted")

        def status():
            msg = (f"Infracluster {infracluster.kind}:{infracluster.namespace}/{infracluster.name} "
                   f"is exists:{infracluster.exists()} and have readiness status:{infracluster.ready}")
            return msg
        LOG.info(f"Additionally check that infracluster "
                 f"{infracluster.resource_type}:{infracluster.namespace}/{infracluster.name} "
                 f"is removed.")
        waiters.wait(lambda: not infracluster.exists(verbose=True),
                     timeout=timeout, interval=interval,
                     timeout_msg='Timeout waiting for cluster deletion',
                     status_msg_function=status)
        LOG.info(f"Infracluster {infracluster.namespace}/{infracluster.name} has been deleted")

    def delete_clusterdeployment_and_check(self, timeout=1800, interval=15):
        """
        Delete clusterdeployment and execute leftover check.
        :param timeout:
        :param interval:
        :return:
        """
        # (va4st): Initialize resource chain before removal
        _ = self.clusterdeployment.clusterobject.infracluster.uid
        # (va4st): Trigger async delete
        self.clusterdeployment.delete()
        LOG.info(f"Removal request sent for cluster deployment "
                 f"{self.clusterdeployment.namespace}/{self.clusterdeployment.name}")

        self.check_clusterdeployment_deleted(timeout, interval)

    def introspect_cap_errors(self, since_seconds=None):
        """Get logs from KCM cap* controllers and show the latest errors"""
        if not settings.ENABLE_INTROSPECT_CAP_ERRORS:
            # cap* controller manager logs check is not enabled
            return

        # Collect logs from all cap* pods and check for errors since last check
        cap_name_pattern = re.compile(r"cap.*-controller-manager-.*")
        pods = self.clusterdeployment._manager.api.pods.list(namespace=settings.KCM_NAMESPACE)
        cap_pods = [pod for pod in pods if cap_name_pattern.match(pod.name)]

        for cap_pod in cap_pods:
            prev_log = self._cap_pods_logs.setdefault(cap_pod.name, [])
            new_log = []
            # Concatenate multiline log to a single line
            for log_line in cap_pod.get_logs(since_seconds=since_seconds).splitlines():
                if new_log and (log_line.startswith(' ') or log_line.startswith('\t')):
                    new_log[-1] += f"\n{log_line}"
                else:
                    new_log.append(log_line)
            # Filter the error lines, and remove lines that already was collected in prev_log
            filtered_log = []
            for log_line in new_log:
                # Filter out the periodic cluster checks
                if "context deadline exceeded" in log_line:
                    continue
                # Check that this is a new 'error' message that is not collected yet in prev_log
                if log_line.startswith('E') and log_line not in prev_log:
                    filtered_log.append(log_line)

            self._cap_pods_logs[cap_pod.name].extend(filtered_log)
            if filtered_log:
                log_lines = '\n'.join(filtered_log[-4:])
                LOG.info(f"\n>>> ENABLE_INTROSPECT_CAP_ERRORS: "
                         f"Found the following errors in the pod '{cap_pod.name}' "
                         f"(4 latest errors max):\n\n{log_lines}\n")

    def wait_aks_machinepools_replicas_populated(self, timeout=300, interval=10):
        """Wait for machinepools created by aks will have in their statuses replicas count

        :param timeout:
        :param interval:
        :return:
        """
        if not self.clusterdeployment.provider == utils.Provider.aks:
            e = (f"Calling specific aks-provider function while cluster provider is "
                 f"{self.clusterdeployment.provider}")
            LOG.error(e)
            raise RuntimeError(e)

        LOG.info(f"Waiting for {timeout} seconds while system machinepool will have replicas count in status")
        waiters.wait(lambda: self.clusterdeployment.clusterobject.infracluster.systempool.replicas,
                     timeout=timeout, interval=interval,
                     timeout_msg='Timeout waiting systempool replicas to be populated in status')
        LOG.info(f"Waiting for {timeout} seconds while user machinepool will have replicas count in status")
        waiters.wait(lambda: self.clusterdeployment.clusterobject.infracluster.userpool.replicas,
                     timeout=timeout, interval=interval,
                     timeout_msg='Timeout waiting userpoolpool replicas to be populated in status')
