import copy
import time
import yaml

import cachetools.func as cachetools_func
from collections import Counter
from si_tests import logger
from si_tests import settings
from si_tests.deployments.utils import kubectl_utils
from si_tests.managers.machine_deletion_policy_manager import check_machine_deletion_policy
from si_tests.utils import utils, waiters, exceptions
from kubernetes.client.rest import ApiException

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from si_tests.managers.kaas_manager import Cluster

osdpl_name = settings.OSH_DEPLOYMENT_NAME


LOG = logger.logger


class Day2OperationsManager(object):
    """Day2 Operations manager"""

    def __init__(self, cluster: "Cluster"):
        self._cluster: "Cluster" = cluster

    @property
    def cluster(self) -> "Cluster":
        return self._cluster

    @property
    @cachetools_func.ttl_cache(ttl=300)
    def k8sclient(self):
        return self._cluster.k8sclient

    def get_machine_to_disable(self):
        """
        Get a Machine with label 'si-role/node-for-disable' to disable it during tests
        """
        machines = self.cluster.get_machines()
        machines_for_disable = []
        for machine in machines:
            labels = machine.data.get('metadata', {}).get('labels', {})
            if 'si-role/node-for-disable' in labels:
                machines_for_disable.append(machine)
        # TODO(ddmitriev): for now, only one Machine is available to run the 'disable' tests
        # on the same Cluster.
        # To make possible to use different Machines for different runs on the same Cluster,
        # we can do the following:
        # 1. Label more than one Machines with 'si-role/node-for-disable'
        # 2. Select only one Machine with such label per test, and use this Machine for disable test
        # 3. During the test, label the selected Machine with another label, for example:
        #    'si-role/selected-node-for-disable'
        # 4. Add a new method like 'get_selected_machine_to_disable', which will search a node with
        #    label 'si-role/selected-node-for-disable'
        # 5. Use this new method in the enable/replace tests, where the Cluster Machines operations
        #    are restored
        # With this approach, 'disable' tests get a Machine from some labeled set of Machines,
        # but 'enable'/'replace' tests get the selected Machine because it was labeled.
        # Also need to consider removing both labels after tests, to not use the same Machine twice
        assert len(machines_for_disable) > 0, "No Machines with label 'si-role/node-for-disable' found"
        assert len(machines_for_disable) == 1, (
            "Multiple Machines with label 'si-role/node-for-disable' found, while expected only 1 Machine")
        return machines_for_disable[0]

    def get_storage_candidate_machines(self, exclude_machine_names=None):
        """
        Get Machines suitable to move there Ceph roles from disabled Machine.
        Machines are selected by label 'si-role/storage-candidate-node'
        """
        exclude_machine_names = exclude_machine_names or []
        cluster_machines = [m for m in self.cluster.get_machines()
                            if m.name not in exclude_machine_names and not m.is_disabled()]

        storage_candidate_machines = [
            m for m in cluster_machines
            if 'si-role/storage-candidate-node' in m.data.get('metadata', {}).get('labels', {})]

        return storage_candidate_machines

    def get_stacklight_candidate_machines(self):
        """
        Get Machines suitable to move there StackLight nodeLabels from disabled Machines
        """
        cluster_machines = self.cluster.get_machines()
        disabled_machines = [m for m in cluster_machines if m.is_disabled()]
        sl_nodelabel = [{"key": "stacklight", "value": "enabled"}]
        non_sl_machines = [m for m in cluster_machines
                           if (not m.has_nodelabels(sl_nodelabel)) and (m not in disabled_machines)]
        # TODO(ddmitriev): consider to select suitable Machines for SL nodeLabels using additional Machine label
        #                  in the child_data
        return non_sl_machines

    def disable_machine(self, machine):
        """
        Disable LCM operations for the specified Machine
        """
        LOG.banner(f"Disable Machine '{machine.name}'")

        # Power off Machine
        LOG.info(f"Power off the Machine '{machine.name}'")
        machine.set_baremetalhost_power(online=False)

        machine_ip = machine.public_ip or machine.internal_ip
        LOG.info(f"Wait until machine '{machine.name}' is no longer available via ICMP on IP:{machine_ip}")
        waiters.wait(lambda: not waiters.icmp_ping(machine_ip), interval=5, timeout=600)

        LOG.info("Machine for disable were powered off, sleeping for couple of minutes")
        time.sleep(120)

        LOG.info(f"Disable LCM operations for Machine {machine.namespace}/{machine.name}")
        machine.disable()

        self.wait_machine_disable(machine)

    def enable_machine(self, machine):
        """
        Enable LCM operations for the specified Machine
        """

        LOG.info(f"Enable LCM operations for Machine {machine.namespace}/{machine.name}")
        machine.enable()

        # Power on Machines
        machine.set_baremetalhost_power(online=True)

        machine_ip = machine.public_ip or machine.internal_ip
        LOG.info(f"Wait until machine {machine.name} becomes available via ICMP on IP:{machine_ip}")
        waiters.wait(lambda: waiters.icmp_ping(machine_ip), interval=5, timeout=1800)

        self.wait_machine_enable(machine)

        # TODO(ddmitriev):
        # 1. Wait Machines readiness (timeout must be the same as for Machine deploy)
        # 2. Check LCMMachine release version (spec.release and status.release
        #                                      are the same as in all other LCMMachines)

    def wait_machine_disable(self, machine):
        LOG.banner("Wait until NodeDisableNotifications are created for 'disabled' Machine", sep='-')
        self.cluster.check.wait_evacuation_status(machine.name, 'disable')
        lcmmachine = self.cluster.get_cluster_lcmmachine(name=machine.name, namespace=machine.namespace)
        assert lcmmachine.data.get('spec', {}).get('haltAgent', False), \
            f"agent of LCMMachine {lcmmachine.name} is not halted"
        machine.wait_for_status_presence("Disabled")
        LOG.info("Verifying that docker node is absent")
        node_name = machine.get_k8s_node_name()
        dashboardclient = self.cluster.mkedashboardclient
        mke_nodes = dashboardclient.get_nodes()
        for mke_node in mke_nodes:
            if mke_node['Description']['Hostname'] == node_name:
                raise Exception(f"Node {node_name} is present in MKE nodes list but it should not be present "
                                f"because LCMMachine has evacuation disable status")
        LOG.info("Verifying that nodedisablenotification is created")
        ndn = self.cluster.get_nodedisablenotification(node_name)
        assert ndn is not None, (
            f"NodeDisableNotification {node_name} is not created but it is expected to be created")

        self._wait_machine_disabled_nodes_deleted(machine)

    def _wait_machine_disabled_nodes_deleted(self, machine):
        """Ensure that k8s nodes are actually deleted from the k8s cluster"""
        LOG.banner("Wait for K8sNodes removal from cluster for 'disabled' Machine", sep='-')
        node_name = machine.get_k8s_node_name()
        try:
            LOG.info(f"Waiting until K8sNode '{node_name}' is deleted from cluster")
            waiters.wait(lambda: not self.k8sclient.nodes.present(node_name),
                         interval=30, timeout=600)
            LOG.info(f"K8sNode {node_name} is absent in the Cluster {self.cluster.namespace}/{self.cluster.name}")
        except exceptions.TimeoutError as e:
            LOG.error(f"K8sNode '{node_name}' was not deleted after Machine '{machine.name} was disabled'")
            k8s_node = machine.get_k8s_node()
            node_data = k8s_node.data
            if 'deletion_timestamp' not in node_data.get('metadata', {}):
                LOG.error(f"K8sNode '{node_name}' has not been marked for deletion, it's a bug")
                raise e
            else:
                # check that clusterrelease < mosk-17-1-1 , W/A should not be required for the next releases
                if self.cluster.workaround.prodx_40036():
                    LOG.warning("Apply workaround for PRODX-40036 and PRODX-40367")
                    self._apply_workaround_prodx_40036_and_40367(machine)
                else:
                    LOG.warning("Skip workaround for PRODX-40036 and PRODX-40367")
                    raise Exception(f"K8sNode {k8s_node.name} was not deleted "
                                    f"after disabling Machine {machine.name}")

    def _apply_workaround_prodx_40036_and_40367(self, machine):
        """Check and apply a workaround for issue with openstack finalizer"""
        k8s_node = machine.get_k8s_node()
        node_data = k8s_node.data
        node_finalizers = node_data.get('metadata', {}).get('finalizers', [])
        # 1. PRODX-40036
        if 'lcm.mirantis.com/openstack-controller.node-finalizer' in node_finalizers:
            LOG.warning(f"Workaround PRODX-40036 for K8sNode {k8s_node.name}: remove openstack finalizer")
            node_finalizers = None
            try:
                k8s_node.patch({'metadata': {'finalizers': node_finalizers}})
            except (Exception, ApiException) as e:
                LOG.warning(f"The following error appeared during disabling "
                            f"the finalizer for K8sNode {k8s_node.name}: {e}")

            waiters.wait(lambda: not self.k8sclient.nodes.present(k8s_node.name),
                         interval=30, timeout=300)
        else:
            raise Exception(f"K8sNode '{k8s_node.name}' don't have an openstack finalizer but was not deleted, "
                            f"looks like a bug")

        # 2. PRODX-40367
        openstack_nwl_name = f"openstack-{k8s_node.name}"
        if self.k8sclient.nodeworkloadlocks.present_all(openstack_nwl_name):
            LOG.warning(f"Workaround PRODX-40367 for K8sNode {k8s_node.name}: "
                        f"remove openstack nwl '{openstack_nwl_name}'")
            openstack_nwl = self.k8sclient.nodeworkloadlocks.get(openstack_nwl_name)
            openstack_nwl.delete()
            waiters.wait(lambda: not self.k8sclient.nodeworkloadlocks.present_all(openstack_nwl_name),
                         interval=10, timeout=300)

    def wait_machine_enable(self, machine):
        LOG.banner("Wait until NodeDisableNotifications are removed for the 'enabled' Machine", sep='-')
        self.cluster.check.wait_evacuation_status(machine.name, "disable", timeout=7200, evacuation_expected=False)
        machine.wait_for_status_absence("Disabled")
        LOG.info("Verifying that nodedisablenotification is deleted")
        node_name = machine.get_k8s_node_name()
        ndn = self.cluster.get_nodedisablenotification(node_name)
        assert ndn is None, f"NodeDisableNotification {node_name} is not deleted but it is expected to be deleted"

    def check_machine_disabled(self, machine):
        lcmmachine = self.cluster.get_cluster_lcmmachine(name=machine.name, namespace=machine.namespace)
        evacuation_status = lcmmachine.data.get('status', {}).get('evacuation', '')
        assert evacuation_status == 'disable', \
            f"LCMMachine {lcmmachine.name} has evacuation status {evacuation_status}, expected 'disable'"
        assert lcmmachine.data.get('spec', {}).get('haltAgent', False), \
            f"agent of LCMMachine {lcmmachine.name} is not halted"
        machine_status = machine.data.get('status') or {}
        readiness_status = machine_status.get('providerStatus', {}).get('status')
        assert readiness_status == 'Disabled', \
            f"Machine {machine.name} has status {readiness_status}, expected 'Disabled'"
        LOG.info("Verifying that docker node is absent")
        node_name = machine.get_k8s_node_name()
        dashboardclient = self.cluster.mkedashboardclient
        mke_nodes = dashboardclient.get_nodes()
        for mke_node in mke_nodes:
            if mke_node['Description']['Hostname'] == node_name:
                raise Exception(f"Node {node_name} is present in MKE nodes list but it should not be present "
                                f"because LCMMachine has evacuation disable status")
        LOG.info("Verifying that nodedisablenotification is created")
        ndn = self.cluster.get_nodedisablenotification(node_name)
        assert ndn is not None, f"NodeDisableNotification {node_name} is not created but it is expected to be created"

    def make_broken_lcm_for_day2_operations(self, machines, state_names):
        """Make some LCM state broken in the LCMMachine objects"""
        for machine in machines:
            lcmmachine = self.cluster.get_cluster_lcmmachine(name=machine.name, namespace=machine.namespace)
            lcmmachine_data = lcmmachine.data
            state_items_overwr = lcmmachine_data['spec']['stateItemsOverwrites']
            for state_name in state_names:
                state_items_overwr[state_name] = {'ansible': '/bin/false'}
            body = {
                'spec': {
                    'stateItemsOverwrites': state_items_overwr
                }
            }
            LOG.info(f"Break ansible in LCMMachine {lcmmachine.namespace}/{lcmmachine.name} for tasks {state_names}")
            lcmmachine.patch(body)

    def fix_broken_lcm_for_day2_operations(self, machines):
        """Fix broken LCM states"""
        for machine in machines:
            lcmmachine = self.cluster.get_cluster_lcmmachine(name=machine.name, namespace=machine.namespace)
            lcmmachine_data = lcmmachine.data
            state_items_overwr = lcmmachine_data['spec']['stateItemsOverwrites']
            for state_name in state_items_overwr.keys():
                # Remove 'ansible' overwrites if exist
                # state_items_overwr.get(state_name, {}).pop('ansible', None)
                state_items_overwr[state_name]['ansible'] = None

            body = {
                'spec': {
                    'stateItemsOverwrites': state_items_overwr
                }
            }
            LOG.info(f"Fix 'ansible' in LCMMachine {lcmmachine.namespace}/{lcmmachine.name}")
            lcmmachine.patch(body)

    def make_broken_reboot_for_day2_operations(self, machine):
        """Make it impossible to reboot the Machine normally, by disabling some services

        SSH private key is required for this method
        """
        LOG.info(f"Break the normal reboot for Machine {machine.namespace}/{machine.name}")
        machine.run_cmd('sudo systemctl mask containerd.service')
        machine.run_cmd('sudo systemctl mask docker.service')
        machine.run_cmd('sudo systemctl mask docker.socket')
        machine.run_cmd('sudo systemctl mask cri-dockerd-mke.service')
        machine.run_cmd('LCM_AGENT_BINARY=$(find /usr/local/bin/ -name "lcm-agent*") && '
                        'test -n "$LCM_AGENT_BINARY" && '
                        'echo "Remove executable flag from $LCM_AGENT_BINARY" && '
                        'sudo chmod 0644 $LCM_AGENT_BINARY && '
                        'sudo pkill -f $LCM_AGENT_BINARY')

    def fix_broken_reboot_for_day2_operations(self, machines, start_services=True):
        """Fix broken reboot for Machines

        SSH private key is required for this method
        """
        for machine in machines:
            LOG.info(f"Fix the normal reboot for Machine {machine.namespace}/{machine.name}")
            # Machine BMH should be 'online: true' at that moment, and Machine should be available via SSH
            machine.run_cmd('sudo systemctl unmask containerd.service')
            machine.run_cmd('sudo systemctl unmask docker.service')
            machine.run_cmd('sudo systemctl unmask docker.socket')
            machine.run_cmd('sudo systemctl unmask cri-dockerd-mke.service')
            machine.run_cmd('LCM_AGENT_BINARY=$(find /usr/local/bin/ -name "lcm-agent*") && '
                            'test -n "$LCM_AGENT_BINARY" && '
                            'echo "Set executable flag for $LCM_AGENT_BINARY" && '
                            'sudo chmod 0755 $LCM_AGENT_BINARY')
            if start_services:
                machine.run_cmd('sudo systemctl start containerd.service')
                machine.run_cmd('sudo systemctl start docker.socket')
                machine.run_cmd('sudo systemctl start docker.service')
                machine.run_cmd('sudo systemctl start cri-dockerd-mke.service')

    def _get_ceph_mon_deployments_info(self):
        rook_ceph_deployments = self.k8sclient.deployments.list(namespace='rook-ceph')
        mons = {
            'ready_num': 0,
            'ceph_daemon_ids': {},
        }
        for deploy in rook_ceph_deployments:
            try:
                deploy_data = deploy.data
                labels = deploy_data.get('metadata', {}).get('labels', {}) or {}
                if labels.get('ceph_daemon_type') == 'mon':
                    template_spec = deploy_data.get('spec', {}).get('template', {}).get('spec', {}) or {}
                    node_selector = template_spec.get('node_selector', {}) or {}
                    node_name = node_selector.get('kubernetes.io/hostname')
                    ceph_daemon_id = labels.get('ceph_daemon_id')
                    if node_name and ceph_daemon_id:
                        mons['ceph_daemon_ids'][node_name] = ceph_daemon_id
                    ready_replicas = deploy.ready_replicas or 0
                    mons['ready_num'] += bool(ready_replicas > 0)
                    LOG.info(f"Deployment '{deploy.name}' on K8sNode '{node_name}' ready replicas: {ready_replicas}")
            except ApiException as e:
                if e.status == 404:
                    LOG.info(f"Raised exception during checking deployments, skipping: {e}")
                else:
                    raise e
        return mons

    def move_ceph_monitor_during_maintenance(self, disabled_k8s_node_name, new_storage_machines, ceph_k8snode_labels,
                                             interval=30, timeout=2400):
        """Perform steps from documentation to move Ceph monitor from 'disabled' Machine to another Machine"""

        # Steps from the documentation page:
        # - https://gerrit.mcp.mirantis.com/c/kaas/kaas-docs/+/194610/2/doc/source/operations-guide/operate-managed/operate-managed-bm/manage-ceph/move-mon-daemon.rst  # noqa

        # ceph-maintenance-controller must not be disabled for some time after patching MiraCeph
        # until at least two ceph-mon replicas will be available.
        # Step 1: ensure suitable cluster condition
        LOG.banner("Wait at least 2 ceph-mon 'ready' deployments before moving ceph-mon replicas "
                   "from disabled Machines", sep='-')
        waiters.wait(lambda: self._get_ceph_mon_deployments_info()['ready_num'] >= 2, interval=30, timeout=3600)

        ceph_maintenance_controller = self.k8sclient.deployments.get(
            name='ceph-maintenance-controller', namespace='ceph-lcm-mirantis')
        rook_ceph_operator = self.k8sclient.deployments.get(
            name='rook-ceph-operator', namespace='rook-ceph')

        ceph_maintenance_controller_replicas = ceph_maintenance_controller.desired_replicas

        # Step 2: disable maintenance controller and enable operator
        LOG.banner("Disable 'ceph-maintenance-controller' and 'rook-ceph-operator'", sep='-')
        ceph_maintenance_controller.set_replicas(0)
        ceph_maintenance_controller.wait_ready()
        rook_ceph_operator.set_replicas(0)
        rook_ceph_operator.wait_ready()

        # Step 3: Ceph operator relies on the info stored in Ceph and tries to re-create outdated replicas,
        #         so we need to remove these outdated replicas from Ceph
        LOG.banner("Remove disabled ceph-mon replicas from Ceph itself", sep='-')
        ceph_mon_deployments_info = self._get_ceph_mon_deployments_info()
        LOG.info("Get rook-ceph-tools pod")
        ceph_tools_pod = self.k8sclient.pods.list(namespace='rook-ceph', name_prefix='rook-ceph-tools')[0]
        ceph_tools_pod.wait_ready()

        LOG.info("Cleanup mons from ceph")
        ceph_daemon_id = ceph_mon_deployments_info['ceph_daemon_ids'].get(disabled_k8s_node_name)
        if ceph_daemon_id:
            LOG.warning(f"Remove Ceph mon '{ceph_daemon_id}'")
            cmd = ['/bin/sh', '-c', f'ceph mon rm {ceph_daemon_id}']
            ceph_tools_pod.exec(cmd)
        else:
            LOG.warning(f"Ceph mon is absent for disabled Node {disabled_k8s_node_name}")

        cmd = ['/bin/sh', '-c', 'ceph mon dump']
        ceph_dump = ceph_tools_pod.exec(cmd)
        LOG.info(f"Current ceph mons:\n{ceph_dump}")

        # Step 4: remove ceph-mon Deployment related to the outdated replicas
        LOG.banner("Delete rook-ceph deployments placed on the disabled Machines", sep='-')
        rook_ceph_deployments = self.k8sclient.deployments.list(namespace='rook-ceph')
        for rook_ceph_deployment in rook_ceph_deployments:
            template_spec = rook_ceph_deployment.data.get('spec', {}).get('template', {}).get('spec', {}) or {}
            node_selector = template_spec.get('node_selector', {}) or {}
            node_name = node_selector.get('kubernetes.io/hostname')
            if node_name == disabled_k8s_node_name:
                LOG.info(f"Delete deployment '{rook_ceph_deployment.namespace}/{rook_ceph_deployment.name}' "
                         f"that was placed on the K8sNode {node_name}")
                rook_ceph_deployment.delete()

        # TODO(ddmitriev): just double-check that maintenance controller is disabled before setting Node labels
        ceph_maintenance_controller_pods = self.k8sclient.pods.list(
            name_prefix='ceph-maintenance-controller', namespace='ceph-lcm-mirantis')
        LOG.info(f"ceph-maintenance-controller pods: {ceph_maintenance_controller_pods}\nSleep 120")
        time.sleep(120)

        # Step 5: Add labels on K8sNodes, because ceph operator will try to find suitable nodes
        #         for new ceph-mon replicas using these labels
        LOG.banner("Add ceph labels on K8sNodes for the new ceph nodes", sep='-')
        for new_storage_machine in new_storage_machines:
            if ceph_k8snode_labels[new_storage_machine.name]:
                # Add ceph labels on k8s Node
                new_storage_machine.add_k8s_node_labels(ceph_k8snode_labels[new_storage_machine.name])

        # Step 6: Enable ceph operator
        LOG.banner("Enable 'rook-ceph-operator'", sep='-')
        rook_ceph_operator.set_replicas(1)
        rook_ceph_operator.wait_ready()

        # Step 7: Wait until ceph operator create new Deployment for ceph-mon on new K8sNodes
        LOG.banner("Wait until at least one new ceph-mon Deployment is created for the new storage Machines", sep='-')
        new_k8s_node_names = [m.get_k8s_node_name() for m in new_storage_machines]

        def _check_new_deployments():
            ceph_mon_deployments_info = self._get_ceph_mon_deployments_info()
            for node_name in new_k8s_node_names:
                ceph_daemon_id = ceph_mon_deployments_info['ceph_daemon_ids'].get(node_name)
                if ceph_daemon_id:
                    LOG.info(f"Found new ceph-mon Deployment for replica '{ceph_daemon_id}' "
                             f"on K8sNode {node_name}")
                    return True
            return False
        waiters.wait(_check_new_deployments, interval=interval, timeout=timeout)

        LOG.banner("Wait until all rook-ceph pods are running", sep='-')
        self.cluster.check.check_k8s_pods(timeout=1200, interval=30, target_namespaces=["rook-ceph"])

        # Step 8: Enable maintenance controller to continue LCM operations with Ceph
        LOG.banner("Enable 'ceph-maintenance-controller'", sep='-')
        ceph_maintenance_controller.set_replicas(ceph_maintenance_controller_replicas)
        ceph_maintenance_controller.wait_ready()

        LOG.banner("DONE: Ceph roles were successfully moved on new Machines", sep='-')

    def wait_nwl_created_for_new_machines(self, new_storage_machines, interval=30, timeout=3600):

        def _check_nwl():
            for machine in new_storage_machines:
                node_name = machine.get_k8s_node_name()
                ceph_nwl_name = f"ceph-{node_name}"
                if not self.k8sclient.nodeworkloadlocks.present_all(ceph_nwl_name):
                    LOG.warning(f"NodeWorkloadLock '{ceph_nwl_name}' still not found")
                    return False
                else:
                    LOG.info((f"Found NodeWorkloadLock '{ceph_nwl_name}'"))
            LOG.info((f"*** All expected NodeWorkloadLocks present "
                      f"in the Cluster {self.cluster.namespace}/{self.cluster.name}"))
            return True

        waiters.wait(_check_nwl, interval=interval, timeout=timeout)

    def disable_ceph_for_machine(self, disabled_machine_name, disabled_k8s_node_name,
                                 storage_candidate_machines=None):
        if not self.cluster.is_ceph_deployed:
            LOG.warning(f"Ceph is not deployed for Cluster '{self.cluster.namespace}/{self.cluster.name}', "
                        f"skip moving ceph monitor to another Machine")
            return

        # Steps from the documentation page:
        # - https://gerrit.mcp.mirantis.com/c/kaas/kaas-docs/+/192191/17/doc/source/common/ops/ceph/ceph-node-disable.rst  # noqa

        LOG.banner("Move Ceph roles from the disabled to a working Machine")

        if self.cluster.workaround.skip_kaascephcluster_usage():
            # Get ceph nodes from MiraCeph
            miraceph = self.cluster.get_miracephcluster()
            ceph_data = miraceph.data
            ceph_nodes = ceph_data.get('spec', {}).get('nodes', [])
            old_ceph_nodes_names = []
            for ceph_node in ceph_nodes:
                if ceph_node['name'] == disabled_machine_name:
                    # Use the provided k8s node name because the disabled node might be already removed at this time
                    old_ceph_nodes_names.append(disabled_k8s_node_name)
                else:
                    old_ceph_nodes_names.append(self.cluster.get_machine_by_k8s_name(ceph_node['name']))
        else:
            # Get ceph nodes (MCC Machine names) from KaasCephCluster
            kaascephcluster = self.cluster.get_cephcluster()
            ceph_data = kaascephcluster.data
            ceph_nodes = ceph_data.get('spec', {}).get('cephClusterSpec', {}).get('nodes', {})
            old_ceph_nodes_names = list(ceph_nodes.keys())

        storage_labels = utils.get_labels_by_type('storage')
        if not storage_candidate_machines:
            storage_candidate_machines = self.get_storage_candidate_machines(old_ceph_nodes_names)
        if not storage_candidate_machines:
            raise Exception(f"There are no available Machines to move Ceph roles"
                            f"from disabled machine {disabled_machine_name}")
        storage_candidate_machine = storage_candidate_machines.pop()

        LOG.info(f"disabled_machine_name = {disabled_machine_name}")
        for m in storage_candidate_machines:
            LOG.info(f"storage_candidate_machine = {m.name}")

        new_storage_machines = []
        ceph_k8snode_labels = {}
        move_ceph_mon_role = False
        cluster_machines = self.cluster.get_machines()

        disabled_machine_in_ceph = False
        if self.cluster.workaround.skip_kaascephcluster_usage():
            for ceph_node in ceph_nodes:
                if ceph_node['name'] == disabled_k8s_node_name:
                    disabled_machine_in_ceph = True
                    break
        else:
            disabled_machine_in_ceph = disabled_machine_name in ceph_nodes

        if not disabled_machine_in_ceph:
            LOG.info("Disabled Machine is not part of KaasCephCluster, nothing to fix for Ceph")
            return

        # 1. Find a new Machine without 'storage' label to move Ceph services
        new_storage_machines.append(storage_candidate_machine)
        LOG.info(f"Ceph will be evacuated from Machine '{disabled_machine_name}' "
                 f"to Machine '{storage_candidate_machine.name}'")

        # 2. Move 'storage' label from disabled Machine to the new Machine
        LOG.info(f"Add 'storage' label to Machine '{storage_candidate_machine.name}'")
        storage_candidate_machine.add_machine_labels(storage_labels)

        disabled_machines = [m for m in cluster_machines
                             if m.name == disabled_machine_name and m.is_disabled()]
        if disabled_machines:
            disabled_machine = disabled_machines[0]
            LOG.info(f"Remove 'storage' label from Machine '{disabled_machine.name}' : {storage_labels.keys()}")
            disabled_machine.remove_machine_labels(list(storage_labels.keys()))

        # 3. Move roles to the new Machine
        LOG.info(f"Move KaasCephCluster roles from Machine '{disabled_machine_name}' "
                 f"to Machine '{storage_candidate_machine.name}'")
        ceph_k8snode_labels[storage_candidate_machine.name] = {}

        if self.cluster.workaround.skip_kaascephcluster_usage():
            node_idx = -1
            for ceph_idx in range(len(ceph_nodes)):
                if ceph_node['name'] == storage_candidate_machine.get_k8s_node_name():
                    node_idx = ceph_idx
                    break
            ceph_node_roles = []
            for ceph_disabling_node in ceph_nodes:
                if ceph_disabling_node['name'] == disabled_k8s_node_name:
                    ceph_node_roles = ceph_disabling_node['roles']
                    for role in ceph_disabling_node['roles']:
                        ceph_k8snode_labels[storage_candidate_machine.name][f"ceph_role_{role}"] = "true"
                    break
            if node_idx == -1:
                ceph_node = {'roles': ceph_node_roles}
                ceph_nodes.append(ceph_node)
            else:
                ceph_nodes[node_idx]['roles'] = ceph_node_roles
            if 'mon' in ceph_node['roles']:
                move_ceph_mon_role = True
        else:
            ceph_nodes[storage_candidate_machine.name] = {'roles': ceph_nodes[disabled_machine_name]['roles']}
            if 'mon' in ceph_nodes[storage_candidate_machine.name]['roles']:
                move_ceph_mon_role = True
            for role in ceph_nodes[disabled_machine_name]['roles']:
                ceph_k8snode_labels[storage_candidate_machine.name][f"ceph_role_{role}"] = "true"

        if ceph_k8snode_labels[storage_candidate_machine.name]:
            # Add ceph labels on k8s Node
            storage_candidate_machine.add_k8s_node_labels(ceph_k8snode_labels[storage_candidate_machine.name])

        # 4. Remove old Machine from ceph nodes
        if self.cluster.workaround.skip_kaascephcluster_usage():
            LOG.info(f"Remove Machine '{disabled_machine_name}' from MiraCeph "
                     f"'{kaascephcluster.namespace}/{kaascephcluster.name}'")
            for ceph_node in ceph_nodes:
                if ceph_node['name'] == disabled_k8s_node_name:
                    ceph_nodes.remove(ceph_node)
                    break
        else:
            LOG.info(f"Remove Machine '{disabled_machine_name}' from kaascephcluster "
                     f"'{kaascephcluster.namespace}/{kaascephcluster.name}'")
            ceph_nodes[disabled_machine_name] = None

        # 5. Update KaasCephCluster with new Machine/node names and remove disabled Machines
        if self.cluster.workaround.skip_kaascephcluster_usage():
            body = {
                'spec': {
                    'nodes': ceph_nodes,
                }
            }
            miraceph = self.cluster.get_miracephcluster()
            LOG.info(f"Make changes to MiraCeph '{miraceph.namespace}/{miraceph.name}'")
            miraceph.patch(body)

            # 6. Wait for updated MiraCeph
            LOG.banner("Wait for MiraCeph readiness", sep='-')
            self.cluster.check.wait_miraceph_phase()
        else:
            body = {
                'spec': {
                    'cephClusterSpec': {
                        'nodes': ceph_nodes,
                    }
                }
            }
            kaascephcluster = self.cluster.get_cephcluster()
            LOG.info(f"Make changes to KaasCephCluster '{kaascephcluster.namespace}/{kaascephcluster.name}'")
            kaascephcluster.patch(body)

            # 6. Wait for updated MiraCeph
            LOG.banner("Wait for new nodes in MiraCeph", sep='-')
            self.cluster.check.wait_miraceph_nodes_updated()

        # 7. Wait for new NWLs
        LOG.banner("Wait that Ceph NodeWorkloadLocks are created for nodes added to Ceph", sep='-')
        self.wait_nwl_created_for_new_machines(new_storage_machines)

        # 8. If 'mon' role was moved
        if move_ceph_mon_role:
            LOG.banner("Move Ceph monitor", sep='-')
            self.move_ceph_monitor_during_maintenance(disabled_k8s_node_name,
                                                      new_storage_machines, ceph_k8snode_labels)

    def disable_stacklight_for_machine(self, disabled_machine_name, disabled_k8s_node_name):
        """Relabel SL nodes and remove SL pvc and pods related to disabled machines

        1. Add spec.providerSpec.value.nodeLabels with {key: stacklight, value: enabled} to all non-SL machines
        2. Remove SL labels from spec.providerSpec.value.nodeLabels from disabled Machines
        3. Proceed with PVC as described in https://gerrit.mcp.mirantis.com/c/kaas/kaas-docs/+/194333 :
           - Remove pvc related to stacklight pods volumes on disabled machines
           - Remove pods related to stacklight on disabled machines
        """
        LOG.banner("Move StackLight from disabled to working Machines")

        cluster_machines = self.cluster.get_machines()
        disabled_machines = [m for m in cluster_machines
                             if m.name == disabled_machine_name and m.is_disabled()]
        sl_nodelabel = [{"key": "stacklight", "value": "enabled"}]
        non_sl_machines = self.get_stacklight_candidate_machines()
        # SL requires at least 3 *Ready* Machines to add labels,
        # which is impossible if one of SL Machines is Disabled and another in maintenance.
        # To proceed with this limitation, let's add labels to all available machines
        for machine in non_sl_machines:
            LOG.info(f"Add nodeLabel stacklight=enabled to Machine '{machine.namespace}/{machine.name}'")
            machine.add_labels(sl_nodelabel)

        for disabled_machine in disabled_machines:
            if disabled_machine.has_nodelabels(sl_nodelabel):
                LOG.info(f"Remove nodeLabel stacklight=enabled from Machine "
                         f"'{disabled_machine.namespace}/{disabled_machine.name}'")
                disabled_machine.remove_labels(sl_nodelabel)

        self.cleanup_pods_and_pvc_from_k8s_node(disabled_k8s_node_name, target_namespaces=['stacklight'])

    def cleanup_ovn_db_inactive_members(self, target_node_k8s_name=None):
        # WA PRODX-50258: When OVN enabled, then we should manually delete old ovn db member
        # 1.Wait for ovn-db statefulset Ready
        # 2.Get all members and compare count
        # 3.Exec into container with duplicated members, and delete member without (self) label
        # 4.Print OVN db status
        # 5.Check octavia heath manager port and re-enable when it is DOWN. PRODX-50218
        if not self.cluster.is_os_deployed():
            LOG.info("There is no MOSK cluster")
            return
        osdpl = self.k8sclient.openstackdeployment.get(osdpl_name, namespace="openstack")
        osdpls = self.k8sclient.openstackdeploymentstatus.get(osdpl_name, namespace="openstack")
        if osdpl.data.get('spec').get('features', {}).get('neutron', {}).get('backend', None) != 'ml2/ovn':
            LOG.info("Skipping the ovn db members cleanup due to ovn is not enabled")
            return

        def _wait_ready():
            ovn_db_status = osdpls.data.get('status').get(
                'health', {}).get('openvswitch_ovn', {}).get('openvswitch-ovn-db', {}).get('status', None)
            LOG.info(f'OVN db status: {ovn_db_status}')
            assert ovn_db_status == 'Ready'
        waiters.wait_pass(_wait_ready, interval=30, expected=AssertionError, timeout=180)

        machines_with_ovn = [
            x for x in self.cluster.get_machines() if 'nodeLabels' in x.spec['providerSpec']['value'].keys() and
            {'key': 'openstack-control-plane', 'value': 'enabled'} in x.nodeLabels
        ]

        ovn_db_pods = self.k8sclient.pods.list_starts_with("openvswitch-ovn-db-")
        assert len(ovn_db_pods) > 0, "OVN enabled, but ovn db pod not found!"
        ovn_db_pod = ovn_db_pods[0]

        ovn_db_sb_members = ovn_db_pod.run_exec(
            ['/bin/sh', '-c', 'ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound'
                              '|grep "at tcp:openvswitch-ovn"|awk \'{print $1":"$4":"$5}\''],
            container='sb', raise_on_error=True
        )["stdout"].split()
        ovn_db_nb_members = ovn_db_pod.run_exec(
            ['/bin/sh', '-c', 'ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound'
                              '|grep "at tcp:openvswitch-ovn"|awk \'{print $1":"$4":"$5}\''],
            container='nb', raise_on_error=True
        )["stdout"].split()
        LOG.info(f"Count of members - SB: {len(ovn_db_sb_members)}, NB: {len(ovn_db_nb_members)}")

        if len(ovn_db_sb_members) == len(machines_with_ovn) and len(ovn_db_nb_members) == len(machines_with_ovn):
            LOG.banner("Duplicate of OVN db members not found")
            return
        if len(ovn_db_sb_members) < len(machines_with_ovn) or len(ovn_db_nb_members) < len(machines_with_ovn):
            raise Exception(
                f"Failed to parse members, incorrect count of members. "
                f"Should be {len(machines_with_ovn)} but Sb: {len(ovn_db_sb_members)} Nb: {len(ovn_db_nb_members)}")

        LOG.banner('OVN db has wrong member count. Lets fix it')

        all_sb_members_names = [m.split(':')[2] for m in ovn_db_sb_members]
        duplicated_sb_members = [k for k, v in Counter(all_sb_members_names).items() if v == 2]
        assert len(duplicated_sb_members) == 1, \
            f"Expected only one member name with two copy, but got {len(duplicated_sb_members)}"
        sb_name = duplicated_sb_members[0].split('.')[0]  # get member container name for filtering by (self) label
        sb_member_to_delete = self.k8sclient.pods.get(sb_name, namespace='openstack').exec(
            ['/bin/sh', '-c', 'ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound'
                              '|grep "at tcp:openvswitch-ovn"|awk \'{print $1":"$4":"$5}\''
                              f"|grep {sb_name}|grep -v self"],
            container='sb').split()[0].split(":")[0]
        LOG.info(f'Delete inactive OVN db sb member with id {sb_member_to_delete}')
        ovn_db_pod.exec(
            ['/bin/sh', '-c', 'ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/kick '
                              f'OVN_Southbound {sb_member_to_delete}'], container='sb')

        all_nb_members_names = [m.split(':')[2] for m in ovn_db_nb_members]
        duplicated_nb_members = [k for k, v in Counter(all_nb_members_names).items() if v == 2]
        assert len(duplicated_nb_members) == 1, \
            f"Expected only one member name with 2 copy, but got {len(duplicated_nb_members)}"
        nb_name = duplicated_nb_members[0].split('.')[0]  # get member container name for filtering by (self) label
        nb_member_to_delete = self.k8sclient.pods.get(nb_name, namespace='openstack').exec(
            ['/bin/sh', '-c', 'ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound'
                              '|grep "at tcp:openvswitch-ovn"|awk \'{print $1":"$4":"$5}\''
                              f"|grep {nb_name}|grep -v self"],
            container='nb').split()[0].split(":")[0]
        LOG.info(f'Delete inactive OVN db nb member with id {nb_member_to_delete}')
        ovn_db_pod.exec(
            ['/bin/sh', '-c', 'ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/kick '
                              f'OVN_Northbound {nb_member_to_delete}'], container='nb')

        LOG.info("Print OVN db status after the cleanup old members")
        ovn_db_status_south = ovn_db_pod.run_exec(
            ['/bin/sh', '-c', 'ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound'],
            container='sb'
        )["stdout"]
        ovn_db_status_north = ovn_db_pod.run_exec(
            ['/bin/sh', '-c', 'ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound'],
            container='nb'
        )["stdout"]
        LOG.info(ovn_db_status_south)
        LOG.info(ovn_db_status_north)

        if target_node_k8s_name:
            client_pod = self.cluster.k8sclient.pods.list(namespace="openstack", name_prefix='keystone-client')
            assert len(client_pod) > 0, "No pods found with prefix keystone-client in namespace openstack"
            client_pod = client_pod[0]
            port_cmd = [
                '/bin/sh', '-c', f"PYTHONWARNINGS=ignore::UserWarning openstack port show "
                                 f"octavia-health-manager-"
                                 f"listen-port-{target_node_k8s_name} -f value -c status"]
            octavia_port = client_pod.exec(port_cmd).strip()
            if octavia_port == "DOWN":
                LOG.info("Octavia port is DOWN, re enable it")
                client_pod.exec(
                    ['/bin/sh', '-c', f"PYTHONWARNINGS=ignore::UserWarning "
                                      f"openstack port set --disable octavia-health-manager-listen-port-"
                                      f"{target_node_k8s_name}"])
                client_pod.exec(
                    ['/bin/sh', '-c', f"PYTHONWARNINGS=ignore::UserWarning "
                                      f"openstack port set --enable octavia-health-manager-listen-port-"
                                      f"{target_node_k8s_name}"])

    def recreate_octavia_resources_and_check(self, os_manager, disabled_node_k8s_name, new_node_k8s_name):

        client_pods = self.cluster.k8sclient.pods.list(namespace="osh-system", name_prefix=os_manager.oc_name)
        client_pod = [cp for cp in client_pods if 'admission' not in cp.name]
        assert len(client_pod) > 0, "Openstack-controller pod not found"
        os_manager_client_pod = client_pod[0]

        # Get OS keystone client pod
        client_pod = self.cluster.k8sclient.pods.list(namespace="openstack", name_prefix='keystone-client')
        assert len(client_pod) > 0, "No pods found with prefix keystone-client in namespace openstack"
        client_pod = client_pod[0]

        LOG.info("Rerun the octavia-create-resources job")
        cmd_octavia = ['/bin/sh', '-c', 'osctl-job-rerun octavia-create-resources openstack']
        resutl_cmd_octavia = os_manager_client_pod.exec(cmd_octavia, container='osdpl')
        LOG.info(f"Octavia rerun job result: {resutl_cmd_octavia}")

        LOG.info("Wait for octavia-health-manager pod started on new node")

        def _wait_octavia_pod(k8s_node_name=''):
            octavia_pods = self.cluster.k8sclient.pods.list_starts_with("octavia-health-manager")
            octavia_pod = [op for op in octavia_pods if op.data['spec']['node_name'] == k8s_node_name]
            if len(octavia_pod) > 0:
                LOG.info("octavia-health-manager was created")
                return True
            else:
                return False
        waiters.wait(
            lambda: _wait_octavia_pod(k8s_node_name=new_node_k8s_name),
            timeout=600, interval=10,
            timeout_msg="Octavia pod is not created on the new node")
        oct_pods = self.cluster.k8sclient.pods.list_starts_with("octavia-health-manager")
        octavia_hm_pod = [o for o in oct_pods if o.data['spec']['node_name'] == new_node_k8s_name][0]

        LOG.info("Wait for octavia-health-manager pod in Ready state")
        try:
            octavia_hm_pod.wait_ready(timeout=600, interval=10)
        except exceptions.TimeoutError:
            LOG.error("Octavia pod is not ready. Try to delete and wait again status (documentation step)")
            octavia_hm_pod.delete()
            time.sleep(10)
            oct_pods = self.cluster.k8sclient.pods.list_starts_with("octavia-health-manager")
            octavia_hm_pod = [o for o in oct_pods if o.data['spec']['node_name'] == new_node_k8s_name][0]
            octavia_hm_pod.wait_ready(timeout=450, interval=10)

        LOG.info("Verify that an OpenStack port for the node has been created and Active")
        port_cmd = [
            '/bin/sh', '-c', f"PYTHONWARNINGS=ignore::UserWarning openstack port show "
                             f"octavia-health-manager-"
                             f"listen-port-{new_node_k8s_name} -f value -c status"]
        octavia_active_port = client_pod.exec(port_cmd).strip()
        assert octavia_active_port == "ACTIVE", "Octavia listen port not found, or not not in status Active"

        LOG.info("Delete octavia port for disabled node")
        client_pod.exec(['/bin/sh', '-c',
                         f"PYTHONWARNINGS=ignore::UserWarning "
                         f"openstack port delete "
                         f"octavia-health-manager-listen-port-{disabled_node_k8s_name}"])

    def cleanup_pods_and_pvc_from_k8s_node(self, disabled_k8s_node_name, target_namespaces=None):
        """Find pods with PVC which bound to the disabled nodes and remove that PVCs and PODs"""

        LOG.banner("Cleanup PODs and PVCs that are scheduled on the disabled Machines")
        target_namespaces = target_namespaces or []
        pods = self.k8sclient.pods.list_all()
        LOG.info(f"Pods and PVCs cleanup started for K8sNode {disabled_k8s_node_name}")
        for pod in pods:
            if target_namespaces and pod.namespace not in target_namespaces:
                continue
            try:
                pod_data = pod.data
                pod_node = pod_data['spec'].get('node_name')
                if pod_node and pod_node == disabled_k8s_node_name:
                    pod_volumes = pod_data['spec']['volumes']
                    for volume in pod_volumes:
                        if volume.get('persistent_volume_claim'):
                            pvc_name = volume['persistent_volume_claim']['claim_name']
                            LOG.info(f"Delete PVC {pvc_name}")
                            pvc = self.k8sclient.pvolumeclaims.get(name=pvc_name, namespace=pod.namespace)
                            pvc.delete()
                    LOG.info(f"Delete Pod '{pod.namespace}/{pod.name}' from node '{pod_node}'")
                    pod.delete()
            except ApiException as e:
                if e.status == 404:
                    LOG.info(f"Skipping error: {e}")
                else:
                    raise e

        LOG.info("Pods cleanup completed")

    def check_cluster_readiness(self, exp_provider_status, expected_condition_fails=None, **kwargs):
        """Check Cluster conditions readiness in case if some Machines are in 'Disabled' state

        :param exp_provider_status: bool, if False, then ignore some Cluster conditions
                                          if True, then run cluster.check.check_cluster_readiness() without excludes
        :param expected_condition_fails: dict, additional conditions to exclude
        """
        if not exp_provider_status:
            expected_condition_fails = expected_condition_fails or {}
            node_status = self.cluster.data.get('status', {}).get('providerStatus', {}).get('nodes', {})
            expected_nodes = node_status.get('requested', 0)
            expected_ready_machines = 0
            expected_disabled_machines = 0
            machines = self.cluster.get_machines()
            for m in machines:
                if m.data.get('status', {}).get('providerStatus', {}).get('status', '') == 'Ready':
                    expected_ready_machines += 1
                elif m.data.get('status', {}).get('providerStatus', {}).get('status', '') == 'Disabled':
                    expected_disabled_machines += 1

            node_condition_msg = (f"Requested {expected_nodes} node(s), ready {expected_ready_machines}, "
                                  f"disabled {expected_disabled_machines}.")
            expected_condition_fails['Nodes'] = node_condition_msg
            expected_condition_fails['Kubernetes'] = "not ready: statefulSets: tf/redis-tf-redis got 2/3 replicas"
            expected_condition_fails['MachineDisable'] = ''
            expected_condition_fails['LCMOperation'] = ''
            if self.cluster.workaround.prodx_42484():
                LOG.warning("Workaround for LCMOperation condition is enabled")
                expected_condition_fails['LCMOperation'] = ''

        self.cluster.check.check_cluster_readiness(expected_condition_fails=expected_condition_fails,
                                                   exp_provider_status=exp_provider_status,
                                                   **kwargs)

    def _copy_and_modify_machine_and_bmh(self, machine,
                                         old_machine_data=None,
                                         old_bmh_data=None,
                                         old_bmhc_data=None,
                                         old_secret_data=None,
                                         day1_provisioning='auto',
                                         day1_deployment='auto'):
        """Collect data from existing objects and make new objects from this data

        old_machine_data, old_bmh_data, old_bmhc_data and old_secret_data can be used
        to make new objects from the provided data instead of taking existing objects.
        :param day1_provisioning:
        'auto' allows automatic progression through the provisioning workflow.
        'manual' or empty string requires explicit approval before proceeding with provisioning.
        empty string is the same as 'manual'
        The value might be overridden depending on the feature flag settings
        and MACHINE_PAUSE_DURING_CREATION_ENABLED value
        :param day1_deployment:
        'auto' allows automatic progression through the deployment workflow.
        'manual' or empty string requires explicit approval before proceeding with deployment.
        empty string is the same as 'manual'
        The value might be overridden depending on the feature flag settings
        and MACHINE_PAUSE_DURING_CREATION_ENABLED

        - Dynamic data is removed from the objects
        - New names are generated using "--new-<....>" suffix
        - BMH Inventory object is always created from BMH object data to simplify the method,
          because BMH always exists, but BMH Inventory can be missing for source data.
        """

        def clean_mutable_data(_obj):
            """Remove dynamic data from the dict object"""
            _obj.pop('status', None)
            _obj['metadata'].pop('creation_timestamp', None)
            _obj['metadata'].pop('resource_version', None)
            _obj['metadata'].pop('uid', None)
            _obj['metadata'].pop('managed_fields', None)
            _obj['metadata'].pop('generation', None)

        def replace_name_suffix(_name, _suffix):
            """Replace the suffix following after '--', to keep constant length after few replaces"""
            return _name.split('--')[0] + f"--new-{_suffix}"

        ns = self.cluster._manager.get_namespace(self.cluster.namespace)

        LOG.banner("Collect data from the existing Machine/BMH/BMHI/BMHC/Secret objects", sep='-')

        # Collect Machine object
        new_machine_data = old_machine_data or copy.deepcopy(machine.data)

        # Collect BMH object
        old_bmh_name = machine.metadata['annotations'].get('metal3.io/BareMetalHost').split("/")[1]
        old_bmh = ns.get_baremetalhost(name=old_bmh_name)
        old_bmh_data = old_bmh_data or old_bmh.data
        new_bmh_data = copy.deepcopy(old_bmh_data)
        new_bmh_data['spec']['online'] = True
        # bm_hosts_data is required for ns.wait_baremetalhost_deletion() to wait also BMHC deletion
        bm_hosts_data = []
        bm_hosts_data.append({})
        bm_hosts_data[0]['name'] = old_bmh_name.split('-')[0]
        bm_hosts_data[0]['bmh_annotations'] = old_bmh_data['metadata'].get('annotations', {}) or {}

        # Collect BMHC object data
        old_bmhc_name = new_bmh_data['metadata'].get('annotations', {}).get(
            'kaas.mirantis.com/baremetalhost-credentials-name')
        old_bmhc = ns.get_baremetalhostcredential(name=old_bmhc_name)
        new_bmhc_data = old_bmhc_data or copy.deepcopy(old_bmhc.data)

        # Collect Secret object
        old_secret = ns.get_secret(name=new_bmh_data['spec']['bmc']['credentialsName'])
        new_secret_data = old_secret_data or copy.deepcopy(old_secret.data)

        # Modify old objects data to create new objects
        suffix = utils.gen_random_string(4)
        new_bmhc_name = replace_name_suffix(old_bmhc_name, suffix)
        new_secret_name = replace_name_suffix(new_secret_data['metadata']['name'], suffix)
        new_bmh_name = replace_name_suffix(new_bmh_data['metadata']['name'], suffix)
        new_machine_name = replace_name_suffix(new_machine_data['metadata']['name'], suffix)

        # Modify BMHCredential and Secret objects
        clean_mutable_data(new_secret_data)
        new_secret_data['metadata'].pop('owner_references', None)
        new_secret_data['metadata']['labels']['kaas.mirantis.com/credentials'] = new_bmhc_name
        new_secret_data['metadata']['name'] = new_secret_name

        clean_mutable_data(new_bmhc_data)
        new_bmhc_data['metadata'].pop('owner_references', None)
        new_bmhc_data['metadata']['annotations'].pop('kaas.mirantis.com/bmh-names', None)
        new_bmhc_data['metadata']['name'] = new_bmhc_name
        new_bmhc_data['spec']['password']['secret']['name'] = new_secret_name

        # Modify BMH object
        clean_mutable_data(new_bmh_data)
        new_bmh_data['spec'].pop('consumerRef', None)
        new_bmh_data['spec'].pop('userData', None)
        new_bmh_data['spec'].pop('image', None)
        new_bmh_data['spec']['bmc']['credentialsName'] = new_secret_name
        new_bmh_data['metadata']['name'] = new_bmh_name
        new_bmh_data['metadata']['annotations']['kaas.mirantis.com/baremetalhost-credentials-name'] = new_bmhc_name
        new_bmh_labels = new_bmh_data['metadata']['labels']
        if 'baremetal' in new_bmh_labels:
            # BMH <id> label created for BMHInventory in MCC >= 2.29.0
            new_bmh_id = replace_name_suffix(new_bmh_labels.pop('baremetal'), suffix)
        elif 'kaas.mirantis.com/baremetalhost-id' in new_bmh_labels:
            # BMH <id> label created for BMH
            new_bmh_id = replace_name_suffix(new_bmh_labels.pop('kaas.mirantis.com/baremetalhost-id'), suffix)
        else:
            raise Exception(f"No baremetal host <ID> label found for BMH '{old_bmh.namespace}/{old_bmh.name}'")
        # Use the baremetal host <id> label name depending on the BMH/BMHI method creation
        if settings.USE_BMH_INVENTORY:
            new_bmh_labels['baremetal'] = new_bmh_id
        else:
            new_bmh_labels['kaas.mirantis.com/baremetalhost-id'] = new_bmh_id

        # Modify Machine object
        clean_mutable_data(new_machine_data)
        new_machine_data['metadata']['name'] = new_machine_name
        new_machine_data['metadata']['annotations'].pop('kaas.mirantis.com/uid', None)    # remove k8s Node uid
        new_machine_data['metadata']['annotations'].pop('metal3.io/BareMetalHost', None)  # remove mapping on old BMH
        new_machine_data['spec']['providerSpec']['value']['disable'] = None
        if settings.FEATURE_FLAGS.enabled('machine-pauses'):
            cr_version = self.cluster.get_desired_clusterrelease_version()
            if utils.clusterrelease_version_greater_than_or_equal_to_kaas_2_30_0(cr_version):
                if not settings.MACHINE_PAUSE_DURING_CREATION_ENABLED:
                    day1_provisioning = 'auto'
                    day1_deployment = 'auto'
                new_machine_data['spec']['providerSpec']['value']['day1Provisioning'] = day1_provisioning
                new_machine_data['spec']['providerSpec']['value']['day1Deployment'] = day1_deployment

        new_machine_hostselector = new_machine_data['spec']['providerSpec']['value']['hostSelector']
        # Use the baremetal host <id> label name depending on the BMH/BMHI method creation
        if settings.USE_BMH_INVENTORY:
            new_machine_hostselector['matchLabels'] = {'baremetal': new_bmh_id}  # replace matchLabels
        else:
            new_machine_hostselector['matchLabels'] = {'kaas.mirantis.com/baremetalhost-id': new_bmh_id}

        LOG.debug(f"New Machine data:\n{yaml.dump(new_machine_data)}")
        LOG.debug(f"New BMH(BMHI) data:\n{yaml.dump(new_bmh_data)}")
        LOG.debug(f"New BMC data:\n{yaml.dump(new_bmhc_data)}")
        LOG.debug(f"New Secret data:\n{yaml.dump(new_secret_data)}")

        return (new_machine_data, new_bmh_data, bm_hosts_data, new_bmhc_data, new_secret_data,
                old_bmh.name, new_machine_name, new_bmh_name, new_bmhc_name, new_secret_name)

    def replace_baremetal_machine_and_bmh(self, machine, machine_deletion_policy="graceful",
                                          machine_deletion_timeout=3600,
                                          old_machine_data=None, old_bmh_data=None,
                                          old_bmhc_data=None, old_secret_data=None):
        """Replace Machine using the same baremetal node

        1. Collect data about existing Machine/BMH/BMHInventory/BMHCredentials/Secret objects
           and modify it to create new objects with changed names/labels
        2. Create a copy of the specified Machine:
           - with changed name
           - with mapping to a not yet created BMH (will be created later)
           That Machine should satisfy the requirements about nodeLabels amount in the cluster.
        3. Delete the existing Machine, which now is not blocked by nodeLabels requirements
        4. Delete the BMHInventory (or BMH for MCC < 2.29.0), which now is not blocked by Machine.
           BMHCredentials and the related Secret must be deleted by controller as well.
        5. Create BMHCredential and the related Secret objects
        6. Create BMHInventory (or BMH, depending on USE_BMH_INVENTORY flag) with name that match
           the bmh mapping in the new Machine from step #1
        7. Wait until BMH is provisioned
        8. Wait until Machine is deployed (or disabled, as in the original spec)
        """

        day1_provisioning_mode = 'auto'
        if settings.FEATURE_FLAGS.enabled('machine-pauses'):
            cluster_release_version = self.cluster.get_desired_clusterrelease_version()
            if utils.clusterrelease_version_greater_than_or_equal_to_kaas_2_30_0(cluster_release_version) and \
                    settings.MACHINE_PAUSE_DURING_CREATION_ENABLED:
                day1_provisioning_mode = 'manual'

        # Make a copy of the Machine and BMH related objects, with a new suffix
        (new_machine_data, new_bmh_data, bm_hosts_data, new_bmhc_data, new_secret_data,
         old_bmh_name, new_machine_name, new_bmh_name, new_bmhc_name, new_secret_name
         ) = self._copy_and_modify_machine_and_bmh(machine, old_machine_data, old_bmh_data,
                                                   old_bmhc_data, old_secret_data,
                                                   day1_provisioning=day1_provisioning_mode,
                                                   day1_deployment=day1_provisioning_mode)

        LOG.banner(f"Replace baremetal Machine '{machine.namespace}/{machine.name}'")

        ns = self.cluster._manager.get_namespace(self.cluster.namespace)
        old_k8s_node_name = machine.get_k8s_node_name()

        if not self.cluster.is_management:
            # "Scale" child cluster with a new machines with the same roles, before deleting old machine
            LOG.banner(f"Create new Machine '{self.cluster.namespace}/{new_machine_name}'", sep='-')
            new_kaasmachine = self.cluster.create_baremetal_machine_raw(data=new_machine_data)
            LOG.banner(f"Check if there at least 2 ready workers before machine delete procedure: "
                       f"'{self.cluster.namespace}/{machine.name}'")
            waiters.wait(lambda: len(self.cluster.get_machines(machine_type='worker', machine_status='Ready')) >= 2,
                         interval=5,
                         timeout=600)
            LOG.banner(f"Ready workers: {self.cluster.get_machines(machine_type='worker', machine_status='Ready')}")

        lcm_machines = self.cluster.get_lcmmachines()
        LOG.banner(f"LCM machines before delete: {lcm_machines}")

        LOG.banner(f"Delete ({machine_deletion_policy}) old Machine '{self.cluster.namespace}/{machine.name}'",
                   sep='-')
        check_machine_deletion_policy(self.cluster, machine,
                                      deletion_policy=machine_deletion_policy,
                                      wait_deletion_timeout=machine_deletion_timeout,
                                      check_deleted_node=False)
        self.cluster.check.wait_deleted_node(old_k8s_node_name)

        LOG.banner(f"Delete old BMH/BMHInventory '{self.cluster.namespace}/{old_bmh_name}'", sep='-')
        # Deletes BMH or BMHInventory depending on which object is present
        ns.delete_baremetalhost(name=old_bmh_name)
        ns.wait_baremetalhost_deletion(old_bmh_name, wait_bmh_cred=True, bm_hosts_data=bm_hosts_data, timeout=3600)

        if self.cluster.is_management:
            # Create new machine for management cluster only after deleting old machine
            LOG.banner(f"Create new Machine '{self.cluster.namespace}/{new_machine_name}'", sep='-')
            new_kaasmachine = self.cluster.create_baremetal_machine_raw(data=new_machine_data)

        LOG.banner(f"Create new Secret '{self.cluster.namespace}/{new_secret_name}'", sep='-')
        self.cluster._manager.api.secrets.create(
            namespace=self.cluster.namespace, name=new_secret_name, body=new_secret_data)

        LOG.banner(f"Create new BMHCredential '{self.cluster.namespace}/{new_bmhc_name}'", sep='-')
        ns.create_baremetalhostcredential_raw(data=new_bmhc_data)

        LOG.banner(f"Create new BMH '{self.cluster.namespace}/{new_bmh_name}'", sep='-')
        ns._create_baremetalhost(body=new_bmh_data)
        new_machine = self.cluster.get_machine_uncached(name=new_machine_name)
        if day1_provisioning_mode == 'manual':
            LOG.info(f"Waiting for machine {new_machine_name} to be paused before provisioning")
            self.cluster.check.wait_machine_status_by_name(machine_name=new_machine_name,
                                                           expected_status='AwaitsProvisioning',
                                                           timeout=5400)
            LOG.info("Checking baremetal hosts: bmh must have 'available' status")
            ns.wait_baremetalhosts_statuses(nodes=new_bmh_name,
                                            wait_status='available',
                                            retries=40,
                                            interval=60)
            LOG.info(f"Removing pause before provisioning from machine {new_machine_name}")
            self.cluster.set_day1_provisioning('auto', [new_machine])
            new_machine = self.cluster.get_machine_uncached(name=new_machine_name)
            self.cluster.check.check_day1_modes([new_machine], provisioning='auto')

        ns.wait_baremetalhosts_statuses(nodes=new_bmh_name,
                                        wait_status='ready',
                                        retries=40,
                                        interval=60)

        if day1_provisioning_mode == 'manual':
            LOG.info(f"Waiting for machine {new_machine_name} to be paused before deployment")
            self.cluster.check.wait_machine_status_by_name(machine_name=new_machine_name,
                                                           expected_status='AwaitsDeployment',
                                                           timeout=5400)
            LOG.info(f"Removing pause before deployment from machine {new_machine_name}")
            self.cluster.set_day1_deployment('auto', [new_machine])
            new_machine = self.cluster.get_machine_uncached(name=new_machine_name)
            self.cluster.check.check_day1_modes([new_machine], deployment='auto')

        if not new_machine.is_disabled():
            LOG.banner(f"Wait new machine '{self.cluster.namespace}/{new_kaasmachine.name}' readiness", sep='-')
            # Waiting for new Machine becomes Ready
            self.cluster.check.wait_machine_status_by_name(machine_name=new_kaasmachine.name,
                                                           expected_status='Ready',
                                                           timeout=5400)
        return new_machine

    @staticmethod
    def get_reboot_required_status(m_list, namespace_name):
        """Get reboot warning
        Get nodes warning from cli
        we need check that kubectl display warning for nodes (reboot for example)
        return true if all machines are requiring reboot
        """
        kubectl = kubectl_utils.Kubectl()
        nodes_warning = {}
        out = kubectl.get('machines', '-o yaml', namespace_name).result_yaml
        for i in out['items']:
            name = i['metadata']['name']
            warning = i['status']['providerStatus'].get('warnings', [])
            if name in m_list and not nodes_warning.get(name):
                nodes_warning[name] = ("Scheduled for a reboot" in warning or "Reboot is in progress" in warning)
        LOG.info("machines without reboot warning:\n" + str([k for k, v in nodes_warning.items() if not v]))
        return all(nodes_warning.values())
