import cachetools.func as cachetools_func
from datetime import datetime
import json
import os
import pytest
import re
import yaml

import dataclasses
import datetime as dt
import kubernetes

from si_tests.deployments import openstack_deploy
from si_tests import logger
from si_tests.managers import openstack_manager
from si_tests import settings
from si_tests.utils import utils
from si_tests.utils import waiters

from future.backports.datetime import timedelta
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from si_tests.managers.kaas_manager import Cluster

LOG = logger.logger


@dataclasses.dataclass
class ClusterInfoBeforeUpdate(object):
    clusterrelease: str
    machines_distributions: dict
    reboot_required: bool
    nodes_data_map: dict
    uptimes: dict
    postpone_distro_upgrade: bool
    cluster_resources: dict
    latest_distro: str
    ceph_version: str


@dataclasses.dataclass
class UpdateInfo(object):
    update_release_name: str
    target_cr_version: str
    is_mosk_release: bool
    is_patchrelease: bool


def dump_nodes_info(nwll):
    nodes_data_map = {}
    for node in nwll:
        try:
            node_name = node["spec"]["nodeName"]
            nwl_name = node["metadata"]["name"]
            nwl_state = node["status"].get("state", None)
            nwl_resource_version = node["metadata"]["resource_version"]
            # Use tuple as a key, because a node may have several NWLs with different names
            nodes_data_map[f"{node_name}/{nwl_name}"] = {"workloadlock_name": nwl_name, "workloadlock_state": nwl_state,
                                                         "resource_version": nwl_resource_version}
            assert nwl_state is not None, f"Node {node_name} has empty workloadlock state: {nwl_state}"
        except Exception as e:
            LOG.error(f'dump_nodes_info: nwll=\n{nwll}')
            raise e
    return nodes_data_map


def step_key(obj):
    return f"{obj.get('id', '-')}: {obj['name']}"


def json_serialize(obj):
    if isinstance(obj, datetime):
        return {'__datetime': obj.isoformat()}
    raise TypeError(f'Unknown object type "{type(obj)}": {obj}')


def json_deserialize(obj):
    if '__datetime' in obj:
        return datetime.fromisoformat(obj.get('__datetime'))
    return obj


class UpdateChildClusterreleaseActions(object):
    """Day2 Operations manager"""

    def __init__(self, cluster: "Cluster"):
        self._cluster: "Cluster" = cluster
        self.update_info = None
        self.cluster_info_before_update = None

    @property
    def cluster(self) -> "Cluster":
        return self._cluster

    @property
    def kaas_manager(self) -> "Cluster":
        return self._cluster._manager

    @property
    @cachetools_func.ttl_cache(ttl=300)
    def k8sclient(self):
        return self._cluster.k8sclient

    def get_os_manager(self):
        child_kubeconfig_name, child_kubeconfig = self.cluster.get_kubeconfig_from_secret()
        child_kubeconfig_path = os.path.join(settings.ARTIFACTS_DIR, child_kubeconfig_name)
        LOG.info("Save cluster kubeconfig to %s", child_kubeconfig_path)
        with open(child_kubeconfig_path, 'w') as f:
            f.write(child_kubeconfig)
        os_manager = openstack_manager.OpenStackManager(kubeconfig=child_kubeconfig_path)
        return os_manager

    def reconfigure_coredns_for_mosk(self):
        if self.cluster.is_mosk:
            LOG.info("Child cluster name contains MOS pattern. "
                     "coredns ConfigMap is going to be adjusted.")
            timeouts = settings.OPENSTACK_DEPLOY_TIMEOUT
            os_manager = self.get_os_manager()
            try:
                os_manager.get_openstackdeployment(
                    name=settings.OSH_DEPLOYMENT_NAME, read=True)
            except kubernetes.client.rest.ApiException as e:
                if e.status == 404:
                    LOG.info("There is no MOS apps deployed.")
            else:
                LOG.info("Configuring coredns in kube-system to work "
                         "with it.just.works domain")
                openstack_deploy.configure_dns_kubesys(
                    os_manager,
                    timeouts.get("3", 180)
                )

    def pre_update(self, update_release_name):
        if (settings.KAAS_CHILD_CLUSTER_UPDATE_INFO_PATH
                and settings.KAAS_CHILD_CLUSTER_UPDATE_CONTINUATION
                and os.path.isfile(settings.KAAS_CHILD_CLUSTER_UPDATE_INFO_PATH)):
            LOG.info(f"Read update info from {settings.KAAS_CHILD_CLUSTER_UPDATE_INFO_PATH}")
            with open(settings.KAAS_CHILD_CLUSTER_UPDATE_INFO_PATH, 'r') as f:
                info = json.loads(f.read(), object_hook=json_deserialize)
                _update_info = UpdateInfo(**info["update_info"])
                _cluster_info_before_update = ClusterInfoBeforeUpdate(**info["cluster_info_before_update"])
            # Ensure that the file contains update data for the current clusterrelease version
            if _update_info.update_release_name == self.cluster.clusterrelease_version:
                self.update_info = _update_info
                self.cluster_info_before_update = _cluster_info_before_update
                LOG.banner(f"Cluster update will be continued for the target version "
                           f"'{self.update_info.update_release_name}' (triggered for {update_release_name})")
                return
            else:
                LOG.warning(f"Ignoring '{settings.KAAS_CHILD_CLUSTER_UPDATE_INFO_PATH}'. "
                            f"File contains data for Cluster version '{_update_info.update_release_name}', "
                            f"but the current Cluster version is '{self.cluster.clusterrelease_version}'")

        # ClusterRelease spec.version is used to find the correct update plan object
        target_cr_version = self.kaas_manager.get_clusterrelease(
            update_release_name).data.get("spec", {}).get("version")
        LOG.info(f"Target cluster version: {target_cr_version}")

        cr_before = self.cluster.clusterrelease_version

        if update_release_name == cr_before:
            msg = (f"Requested {update_release_name} is the same as current "
                   f"clusterrelease version {cr_before}, skipping update")
            LOG.info(msg)
            pytest.skip(msg)

        is_patchrelease = self.cluster.is_patchrelease_upgrade(clusterrelease_version_before=cr_before,
                                                               clusterrelease_version_after=update_release_name)

        is_mosk_release = bool(re.search(r"mos.?-.*", self.cluster.clusterrelease_version))

        self.update_info = UpdateInfo(
            update_release_name,
            target_cr_version,
            is_mosk_release,
            is_patchrelease)
        LOG.debug(f"Update info before update populated with version,"
                  f"target release: {self.update_info}")

        reboot_required = self.cluster.update_requires_reboot(
            cr_before=cr_before, target_clusterrelease=update_release_name)

        if reboot_required:
            LOG.info("Restart of nodes will be required during update")

        # TODO(ddmitriev): add condition for (is_maintenance_skip and not is_patchrelease)
        # to ensure that rebootRequired flag is NOT set for upgrade paths that are
        # 'major' upgrades but with skipMaintenance==True

        if self.cluster.provider is not utils.Provider.byo:
            uptimes = self.cluster.get_machines_uptime()

        LOG.info("Collect child cluster fixed resources before update")
        lcm_type = self.cluster.data['status'].get(
            'providerStatus', {}).get('releaseRefs', {}).get(
            'current', {}).get('lcmType', '')
        LOG.info("LCM Type is {0}".format(lcm_type))
        cluster_resources = self.cluster.describe_fixed_resources()
        LOG.debug("Child cluster resources before update:\n{0}"
                  .format(yaml.dump(cluster_resources)))
        ucp_tag_in_cr_before = set(
            [x['params']['ucp_tag'] for x in self.kaas_manager.get_clusterrelease(
                cr_before).data['spec']['machineTypes']['control']
             if 'ucp_tag' in x['params'].keys()])

        # TODO (tleontovich) Delete after 2.11 as
        #  label will be added during cluster creation
        # Check if we need labels
        lma_label = {"key": "stacklight", "value": "enabled"}
        not_labeled = [x for x in self.cluster.get_machines(machine_type="worker")
                       if lma_label not in x.nodeLabels]
        lma_labeled = [x for x in self.cluster.get_machines(machine_type="worker")
                       if lma_label in x.nodeLabels]
        if self.cluster.sl_ha_enabled() and len(lma_labeled) < 3:
            LOG.info(f"Labeled nodes {lma_labeled}")
            LOG.info(f"Non labeled nodes {not_labeled}")
            for machine in not_labeled:
                LOG.info(f"Updating machine {machine} with {lma_label}")
                machine.add_labels([lma_label])
                waiters.wait(lambda: machine.check_k8s_nodes_has_labels(),
                             timeout=300, interval=10)
                assert lma_label in machine.nodeLabels, \
                    f"Failed to update {machine} with {lma_label}"

        latest_distro = ""
        if self.cluster.provider is not utils.Provider.byo:
            latest_distro = self.kaas_manager.get_latest_available_clusterrelease_distro(cr_before)
        if settings.KAAS_CHILD_CLUSTER_DISTRO_UPGRADE:
            machines = self.cluster.get_machines()
            # Skip if all machines are already at the latest allowed distro
            if not all(m.data.get('spec', {}).get('providerSpec', {}).get('value', {}).get(
                    'distribution') == latest_distro for m in machines):
                self.cluster.set_postpone_distribution_upgrade()
                for machine in machines:
                    machine_name = machine.data.get('metadata', {}).get('name')
                    curr_distro = machine.data.get('spec', {}).get('providerSpec', {}).get('value', {}).get(
                        'distribution')
                    LOG.info(f"Distribution for {machine_name} - {curr_distro}")
                    if curr_distro != latest_distro:
                        LOG.info(f"Setting {machine_name} to {latest_distro}")
                        machine.set_distribution(distribution=latest_distro)
            else:
                LOG.info(
                    f"All machine distros are already at the latest allowed version for the release - {latest_distro}")
        # Save this flag to check the distribution after the cluster upgrade if required.
        # Flag may be enabled in this test under "f settings.KAAS_CHILD_CLUSTER_DISTRO_UPGRADE" above,
        # or may be enabled from an another job, so need to read it directly from the Child cluster.
        is_postpone_distribution_upgrade_enabled = self.cluster.is_postpone_distribution_upgrade_enabled
        LOG.info(f"Postponed distribution upgrade value: {is_postpone_distribution_upgrade_enabled}")
        machines_distributions = None
        if self.cluster.provider is utils.Provider.baremetal:
            machines_distributions = self.cluster.get_machines_distributions_from_nodes()

        nodes_data_map = {}
        if self.cluster.provider in utils.Provider.with_ceph():
            # Dump all WorkLoadLocks states before update procedure
            # Save list with ModeWorkLoadLocks dicts
            nwll = [nwl.data for nwl in self.cluster.get_nodeworkloadlocks()]
            assert nwll, f"NodeWorkLoadLocks list is empty in child cluster: {self.cluster.name}"
            nodes_data_map = dump_nodes_info(nwll)
            LOG.info(f"Current checks status before update: \n{yaml.dump(nodes_data_map)}")

        ceph_version = None
        if self.cluster.is_ceph_deployed:
            if self.cluster.workaround.skip_kaascephcluster_usage():
                ceph_version = self.cluster.get_miracephhealth_version()
            else:
                ceph_version = self.cluster.get_ceph_version()

        self.cluster_info_before_update = ClusterInfoBeforeUpdate(
            clusterrelease=cr_before,
            machines_distributions=machines_distributions,
            reboot_required=reboot_required,
            nodes_data_map=nodes_data_map,
            uptimes=uptimes,
            postpone_distro_upgrade=is_postpone_distribution_upgrade_enabled,
            cluster_resources=cluster_resources,
            latest_distro=latest_distro,
            ceph_version=ceph_version)

        LOG.debug(f"Update info before update populated "
                  f"with uptime, distro, ceph_version: {self.update_info}")

        if self.cluster.provider is utils.Provider.baremetal:
            LOG.info("Set rebootIfUpdateRequires to default and control update groups")
            default_ug = self.cluster.get_default_update_group()
            controlplane_ug = self.cluster.get_control_update_group()
            # rebootIfUpdateRequires is False by default which means machines
            # won't be rebooted even if rebootRequired is set for release.
            # Set rebootIfUpdateRequires to True for control plane machines and
            # machines that are in default update group to keep the same behavior
            # as in the previous versions
            default_ug.patch({"spec": {"rebootIfUpdateRequires": True}})
            controlplane_ug.patch({"spec": {"rebootIfUpdateRequires": True}})

        LOG.info("Cluster release before update {0}".format(cr_before))
        LOG.info('ucp_tag before update {0}'.format(ucp_tag_in_cr_before))
        LOG.info(("=" * 80) + "\n\nUpdating child cluster {0} "
                              "to the clusterrelease {1}\n\n"
                 .format(self.cluster.name, update_release_name))

        LOG.info("Save update info to artifacts/update_info.json")
        with open(os.path.join(settings.ARTIFACTS_DIR, 'update_info.json'), 'w') as f:
            info = {
                "update_info": dataclasses.asdict(self.update_info),
                "cluster_info_before_update": dataclasses.asdict(self.cluster_info_before_update),
            }
            # yaml.safe_dump(info, f)
            json.dump(info, f, default=json_serialize)

    def base_readiness_check(self, os_transitional_replicas=False):
        """Base checks to ensure cluster services readiness between ClusterUpdatePlan steps execution"""
        # Nodes will not have updating status if there is no changes in ucp_tag or CR versions
        self.cluster.check.check_k8s_nodes()
        self.cluster.check.check_k8s_pods()
        # If update is interrupted after some update plan step, then some nodes may be still in 'Prepare' state
        self.cluster.check.check_machines_status(expected_status=['Ready', 'Prepare'])

        # Call pre_update() from the test, to collect data for the self.update_info
        if self.update_info.is_mosk_release:
            os_manager = self.get_os_manager()
            os_controller_version = os_manager.os_controller_version()
            LOG.info(f"OpenStack controller version: {os_controller_version}")

            os_manager.wait_os_deployment_status(timeout=settings.OPENSTACK_LCM_OPERATIONS_TIMEOUT, status="APPLIED")
            os_manager.wait_osdpl_services()

            LOG.info("Wait osdpl health status=Ready")
            os_manager.wait_openstackdeployment_health_status(
                timeout=1800)
            LOG.info("Wait os jobs to success and pods to become Ready")
            os_manager.wait_os_resources(timeout=1800, os_transitional_replicas=os_transitional_replicas)
            self.reconfigure_coredns_for_mosk()

    def post_update(self):

        self.cluster.check.check_cluster_nodes()

        machines_distributions_before_update = self.cluster_info_before_update.machines_distributions
        reboot_required = self.cluster_info_before_update.reboot_required
        if self.cluster.provider is utils.Provider.baremetal:
            machines_distributions_after_update = self.cluster.get_machines_distributions_from_nodes()
            # Dictionary with Machine names in keys and boolean in values,
            # to reflect reboot expectations per each Machine
            # Depends on:
            #   - flag 'reboot_required'. If set, then all machines are expected to reboot.
            #   - OS distribution changed on nodes during upgrade.
            #     Reboot expected even if 'reboot_required' is not set.
            reboot_expected = {machine_name: (machines_distributions_after_update.get(machine_name) !=
                                              machines_distributions_before_update.get(machine_name)
                                              ) or reboot_required
                               for machine_name in machines_distributions_after_update.keys()}
        else:
            # Set reboot_required flag for non-BM machines
            reboot_expected = {machine.name: reboot_required for machine in self.cluster.get_machines()}
        # The same but for Node names
        reboot_expected_nodes = {machine.get_k8s_node_name(): reboot_expected.get(machine.name)
                                 for machine in self.cluster.get_machines()}

        # Check that all workloadlock objects were updated
        if self.cluster.provider in utils.Provider.with_ceph():
            nodes_data_map_before = self.cluster_info_before_update.nodes_data_map
            # Refresh nodeworkloadlock dict
            nwll = [nwl.data for nwl in self.cluster.get_nodeworkloadlocks()]
            assert nwll, f"NodeWorkLoadLocks list is empty in child cluster: {self.cluster.name}"
            nodes_data_map_after = dump_nodes_info(nwll)
            LOG.info(f"Current checks status after update: \n{yaml.dump(nodes_data_map_after)}")
            for node in nwll:
                node_name = node["spec"]["nodeName"]
                nwl_name = node["metadata"]["name"]
                nwl_id = f"{node_name}/{nwl_name}"
                node_name = node["spec"]["nodeName"]
                if not (reboot_expected_nodes[node_name]):
                    LOG.debug(f"Skip checking NWL '{nwl_id}' because the node '{node_name}' was not expected to reboot")
                    continue

                # NOTE(vsaienko): The nodes_data_map_before is collected before triggering update, we have a case when
                # controller with LCM functionality added with update, in this case the data will not be present before
                # triggering update. Check only that resource version is changed for objects that were present before
                # update. Also check resource version change for objects that were in active state originally as in
                # other case the controller just ignore LCM actions for node.
                if nwl_id in nodes_data_map_before:
                    # Check that resourceVersion is updated
                    if nodes_data_map_before[nwl_id]["workloadlock_state"] == "active":
                        _fail_msg = (f"Resource version for nodeworkloadlock '{nwl_id}' was not updated, but expected."
                                     f"It means that there are no changes were made for NodeWorkLoadLock object "
                                     f"and this is FAIL, because node has Reboot Required flag in true")
                        assert nodes_data_map_before[nwl_id]["resource_version"] != \
                               nodes_data_map_after[nwl_id]["resource_version"], _fail_msg

        # Check Ceph was updated or not
        if self.cluster.is_ceph_deployed:
            if self.cluster.workaround.skip_kaascephcluster_usage():
                ceph_version_after = self.cluster.get_miracephhealth_version()
            else:
                ceph_version_after = self.cluster.get_ceph_version()
            # Check Ceph was updated or not
            if self.cluster_info_before_update.ceph_version != ceph_version_after:
                LOG.info(f"Ceph cluster was updated from {self.cluster_info_before_update.ceph_version} "
                         f"to {ceph_version_after}")
            else:
                LOG.info(f"Ceph was not updated. Current "
                         f"version: {self.cluster_info_before_update.ceph_version}")

        os_manager = None
        if self.update_info.is_mosk_release:
            os_manager = self.get_os_manager()
            self.reconfigure_coredns_for_mosk()

        if self.update_info.is_patchrelease:
            # regarding https://mirantis.jira.com/browse/PRODX-31773
            readiness_timeout = settings.CHECK_CLUSTER_READINESS_TIMEOUT + 1800
            LOG.info(f"Use extra timeout {readiness_timeout} sec. for patch release cluster readiness")
        else:
            readiness_timeout = settings.CHECK_CLUSTER_READINESS_TIMEOUT

        self.cluster.check.check_cluster_readiness(timeout=readiness_timeout)
        self.cluster.check.check_helmbundles()
        self.cluster.check.check_k8s_nodes()
        if self.cluster.provider is utils.Provider.vsphere and not settings.KAAS_VSPHERE_IPAM_ENABLED:
            self.cluster.check.check_upgrade_stage_success(skipped_stages_names='Network prepared')
        else:
            self.cluster.check.check_upgrade_stage_success()
        # Check/wait for correct docker service replicas in cluster
        # We have renamed agent if ucp changes version
        ucp_worker_agent_name = self.cluster.check.get_ucp_worker_agent_name()
        self.cluster.check.check_actual_expected_docker_services(
            changed_after_upd={'ucp-worker-agent-x': ucp_worker_agent_name})
        self.cluster.check.check_actual_expected_pods()
        if self.cluster.provider is not utils.Provider.byo:
            self.cluster.check.check_no_leftovers_after_upgrade()

        if self.cluster.is_ceph_deployed:
            self.cluster.check.check_ceph_pvc()

        # check k8s version
        failed = {}
        for node in self.cluster.k8sclient.nodes.list_all():
            node_info = node.read().status.node_info
            if node_info.kubelet_version != node_info.kube_proxy_version:
                failed[node.name] = "kubelet_version {0} doesn't match " \
                                    "kube_proxy_version {1} version".format(
                    node_info.kubelet_version,
                    node_info.kube_proxy_version)

        assert failed == {}, "k8s versions mismatch " \
                             "Details: {}".format(failed)
        # TODO(tleontovich) Check in 2.14 in BYO will have reboot
        if self.cluster.provider is not utils.Provider.byo:
            uptimes_after = self.cluster.get_machines_uptime(dump_reboot_list=True)
            self.cluster.check.check_machines_reboot(
                self.cluster_info_before_update.uptimes, uptimes_after, reboot_expected)
        else:
            LOG.info(f'Skip reboot checks as provider values is {utils.Provider.byo}')

        kernel_changed = {}
        if self.cluster.provider is utils.Provider.baremetal:
            LOG.info("Check repository url for child cluster")
            self.cluster.check.check_repository_url()
            LOG.info("Check kernel versions")
            self.cluster.check.check_actual_expected_kernel_versions(
                self.cluster_info_before_update.postpone_distro_upgrade)
            kernel_changed = self.cluster.check.expect_kernel_changed_between_versions(
                cr_previous=self.cluster_info_before_update.clusterrelease)

        self.cluster.store_k8s_artifacts()
        self.cluster.provider_resources.save_artifact()

        # Check for changed/missing resources
        self.cluster.check.check_k8s_resources_after_update(
            self.cluster_info_before_update.cluster_resources,
            reboot_expected_nodes=reboot_expected_nodes)
        LOG.info("\n*** Child cluster <{0}> "
                 "have been upgraded to the clusterrelease {1}"
                 .format(self.cluster.name, self.update_info.update_release_name))

        if self.cluster.provider is utils.Provider.baremetal:
            LOG.info("Check actual/expected reboot-required status for all machines")
            self.cluster.check.check_expected_actual_reboot_required_status()

        is_maintenance_skip = self.cluster.is_skip_maintenance_set(
            cr_before=self.cluster_info_before_update.clusterrelease,
            target_clusterrelease=self.cluster.clusterrelease_version)

        if self.update_info.is_mosk_release:
            if kernel_changed and not reboot_required:
                LOG.info("Will check for reboot required status is True for machines "
                         "due to kernel changed after upgrade")
                machines_statuses = self.cluster.get_machines_reboot_required_status()
                is_reboot_required_msg = ""
                for machine_name, machine_reboot_required_status in machines_statuses.items():
                    # Reboot is expected if a distribution was changed during upgrade,
                    # or if reboot_required flag was set for the cluster version
                    # in the kaasrelease 'supportedClusterReleases'
                    expect_machine_reboot = reboot_expected.get(machine_name)
                    if expect_machine_reboot and machine_reboot_required_status:
                        is_reboot_required_msg += (
                            f"Machine '{machine_name}' reboot should be executed during upgrade, but flag "
                            f"'Machine.status.providerStatus.reboot.required' is still 'True'\n")
                    if not expect_machine_reboot and not machine_reboot_required_status:
                        is_reboot_required_msg += (
                            f"Machine '{machine_name}' have a changed kernel version "
                            f"but disabled rebootRequired flag. 'Machine.status.providerStatus.reboot.required' "
                            f"is 'False', while it should be set to True")
                assert not is_reboot_required_msg, (
                    f"Wrong reboot required status set for Machines after kernel was changed\n"
                    f"{is_reboot_required_msg}")

            if self.cluster.tf_enabled():
                tf_manager = os_manager.tf_manager
                self.cluster.mos_check.check_cassandra_nodes_config(
                    os_manager=os_manager,
                    actualize_nodes_config=settings.TF_CASSANDRA_NODES_CLENAUP)
                if is_maintenance_skip and not tf_manager.is_vrouter_component_updated():
                    LOG.warning("vRouter pods weren't updated because skip_maintenance flag is True.")
                    tf_manager.update_tfvrouter_pods()
                    tf_manager.wait_tf_controllers_healthy(timeout=300)
                    tf_manager.wait_tfoperator_healthy(timeout=210)
                self.cluster.mos_check.check_vrouter_pods('tf-vrouter-agent', os_manager=os_manager)

        if self.update_info.is_patchrelease or (is_maintenance_skip and not self.update_info.is_patchrelease):
            self.cluster.check.check_upgraded_machines_cordon_drain_stages(
                skip_maintenance=is_maintenance_skip, reboot_expected=reboot_expected)

        if self.cluster.provider is utils.Provider.baremetal:
            if self.cluster_info_before_update.postpone_distro_upgrade and not self.update_info.is_patchrelease:
                self.cluster.check.check_inplace_distribution_upgrade_completed(
                    self.cluster_info_before_update.latest_distro)
                self.cluster.set_postpone_distribution_upgrade(enabled=False)
            else:
                # Ensure that distributions on nodes were not changed.
                # Distributions should not be changed if 'postponeDistributionUpdate' is disabled
                # Distributions should not be changed if 'postponeDistributionUpdate' is enabled,
                # but it is a patch update
                changed_nodes_distribution = [
                    f"'{m_name}': '{machines_distributions_before_update[m_name]}' -> "
                    f"'{machines_distributions_after_update[m_name]}'"
                    for m_name in machines_distributions_after_update.keys()
                    if machines_distributions_before_update[m_name] != machines_distributions_after_update[m_name]
                ]
                assert not changed_nodes_distribution, (
                        "Unexpected distribution changes on the hosts for the Machines:\n" +
                        '\n'.join(changed_nodes_distribution))
            self.cluster.check.check_actual_expected_distribution()
        etcd_quota = self.cluster.data['spec'].get(
            'providerSpec', {}).get('value', {}).get('etcd', {}).get('storageQuota', '')
        if etcd_quota:
            assert self.cluster.check.check_etcd_quota_applied(), (f"etcd storage quota from clusterSpec is: "
                                                                   f"{etcd_quota}, but not applied on target nodes.")
            self.cluster.check.check_etcd_storage_quota_negative()
        else:
            LOG.info("Etcd storage quota is not enabled. Skipping check")

        if settings.DESIRED_RUNTIME:
            self.cluster.check.compare_cluster_runtime_with_desired()

        if self.cluster.provider is utils.Provider.baremetal:
            self.cluster.check.check_diagnostic_cluster_status()

    def update_plan_get_steps_names_to_process(self, till_step_name=""):
        """Get update plan steps to process

        - ensure that no update plans for other cluster <versions> were used yet
        - check that there are no steps with wrong status. Expected statuses: 'Completed' or 'NotStarted'
        - return list of uncompleted steps to process

        return: list of dicts with <steps> to complete.
        """
        version = self.update_info.target_cr_version
        plan = self.cluster.update_plan_get(version)

        # Ensure that update plans for other versions were not used yet
        other_update_plans = [up for up in self.cluster.update_plan_list()
                              if up.name != plan.name]
        other_steps_messages = []
        for oup in other_update_plans:
            oup_status = oup.data.get('status', {}) or {}
            if oup_status.get('status') == 'Completed':
                # Skip checking update plans which already completed
                continue
            oup_status_steps = oup_status.get('steps', [])
            if oup_status_steps:
                for oup_status_step in oup_status_steps:
                    other_steps_messages.append(
                        f"[{oup.namespace}/{oup.name}] {oup_status_step.get('name')}: "
                        f"{oup_status_step.get('status')}, {oup_status_step.get('message')}")
        if other_steps_messages:
            other_steps_message = "\n".join(other_steps_messages)
            raise Exception(f"Cannot use ClusterUpdatePlan '{plan.namespace}/{plan.name}' because other update plans "
                            f"were activated for the cluster:\n{other_steps_message}")

        current_steps = plan.data.get("spec", {}).get("steps", [])
        assert current_steps, f"Update plan {plan.namespace}/{plan.name} list of steps is empty"

        current_steps_status = {step_key(step): step
                                for step in (plan.data.get("status", {}) or {}).get("steps", [])}

        # Skip steps that are already completed
        steps_to_update = []
        for step in current_steps:
            step_key_name = step_key(step)
            step_status = current_steps_status.get(step_key_name)
            if step_status:
                step_status_name = step_status.get('status')
                # Skip only 'Completed' steps without intermediate uncompleted steps
                if step_status_name == 'Completed' and not steps_to_update:
                    if step['commence']:
                        LOG.info(f"ClusterUpdatePlan step '{step_key_name}' is Completed, skipping")
                        continue
                    else:
                        raise Exception(f"ClusterUpdatePlan step '{step_key_name}' is Completed, but 'commence' flag "
                                        f"of this step is set to 'false' which is unexpected")
                elif step_status_name != 'NotStarted':
                    step_status_message = step_status.get('message')
                    raise Exception(f"ClusterUpdatePlan step '{step_key_name}' has unexpected status "
                                    f"'{step_status_name}': {step_status_message}")
            steps_to_update.append(step)

        # If settings.KAAS_CHILD_CLUSTER_UPDATE_PLAN_TILL_STEP_NAME is not empty,
        # then check that this step name/id exists in the update plan steps
        if till_step_name:
            stop_on_step_names = [step_key(step) for step in steps_to_update
                                  if step.get('id') == till_step_name
                                  or step.get('name') == till_step_name]
            if len(stop_on_step_names) == 1:
                stop_on_step_name = stop_on_step_names[0]
                LOG.info(f"KAAS_CHILD_CLUSTER_UPDATE_PLAN_TILL_STEP_NAME: "
                         f"Cluster update will be stopped after the update plan step '{stop_on_step_name}'")
            elif stop_on_step_names:
                raise Exception(f"Multiple steps found in the update plan '{plan.namespace}/{plan.name}' "
                                f"for the specified KAAS_CHILD_CLUSTER_UPDATE_PLAN_TILL_STEP_NAME: "
                                f"{stop_on_step_names}")
            else:
                raise Exception(f"No such incompleted step name/id found in the update plan '{plan.namespace}/"
                                f"{plan.name}' for the specified KAAS_CHILD_CLUSTER_UPDATE_PLAN_TILL_STEP_NAME: "
                                f"'{settings.KAAS_CHILD_CLUSTER_UPDATE_PLAN_TILL_STEP_NAME}'")

        return steps_to_update

    def create_estimated_time_list_from_update_plan(self, target_cr_version=None):
        """Return list of dicts with step names and estimated update values

        return: list of dicts with names and estimated_time values for each step
        """
        steps_estimated_list = []
        assert target_cr_version, ("For correct estimated values processing it is obligatory "
                                   "to provide 'target_cr_version' to this function")
        update_plan_version = target_cr_version.split('+')[0]
        steps_to_update = self.cluster.update_plan_get(update_plan_version).data.get('spec', {}).get('steps', [])
        for step in steps_to_update:
            estimated_time = step.get('duration', {}).get('estimated', "")
            if estimated_time:
                steps_estimated_list.append(
                    {'step_name': step.get('id'), 'estimated_time': estimated_time})
        LOG.info(f"\n{yaml.dump(steps_estimated_list)}")

        return steps_estimated_list

    def compare_estimated_time_list_with_duration_time(self, steps_estimated_list=None, target_cr_version=None):
        """Check that step duration time is less than estimated update time

        return: Through exception is check is FAILED or return nothing
        """
        assert steps_estimated_list, ("For correct estimated values processing it is obligatory "
                                      "to provide 'steps_estimated_list' value")
        assert target_cr_version, ("For correct estimated values processing it is obligatory "
                                   "to provide 'target_cr_version' to this function")
        update_plan_version = target_cr_version.split('+')[0]
        update_plan = self.cluster.update_plan_get(update_plan_version).data.get('status', {})
        assert update_plan, "Cluster update plan doesn't have 'Status' field after update"
        steps_status_list = update_plan.get('steps', [])
        for step in steps_estimated_list:
            step_duration = [step_after for step_after in steps_status_list
                             if step_after["id"] == step["step_name"]][0]
            parsed_time_dur = utils.parse_time_value(step_duration.get("duration", ""))
            assert parsed_time_dur, "No duration field found in Status"
            step["checked"] = 'PASSED'
            duration = timedelta(hours=parsed_time_dur["hours"],
                                 minutes=parsed_time_dur["minutes"],
                                 seconds=parsed_time_dur["seconds"])
            step["duration_time"] = str(dt.timedelta(seconds=duration.total_seconds()))
            parsed_time_est = utils.parse_time_value(step["estimated_time"])
            assert parsed_time_est, "No estimated field found in Status"
            estimated = timedelta(hours=parsed_time_est["hours"],
                                  minutes=parsed_time_est["minutes"],
                                  seconds=parsed_time_est["seconds"])
            if duration > estimated:
                # NOTE(vdrok): if estimated time is 0, consider 5 minutes to be overtime
                if duration > timedelta(
                        seconds=(estimated.total_seconds() + (estimated.total_seconds() // 100) * 20) or 60 * 5):
                    step["checked"] = 'FAILED'
                else:
                    step["checked"] = 'WARNING'

        LOG.info(f"\n{yaml.dump(steps_estimated_list)}")

        warn_steps = [step for step in steps_estimated_list if step["checked"] == 'WARNING']
        if warn_steps:
            LOG.warning(f"Some steps is bigger than estimated time but less than 20%: \n{yaml.dump(warn_steps)}")

        failed_steps = [step for step in steps_estimated_list if step["checked"] == 'FAILED']
        _fail_msg = (f"Duration time is bigger more than 20% "
                     f"of estimated value for steps: \n{yaml.dump(failed_steps)}")
        if failed_steps:
            if settings.SI_IGNORE_UPDATE_PLAN_ESTIMATED_STEP_TIMEOUT_FAIL:
                LOG.warning(_fail_msg)
            else:
                raise Exception(_fail_msg)
