import random
import re
import time

import exec_helpers
import pytest
import yaml
from kubernetes.client.rest import ApiException

from si_tests import logger, settings
from si_tests.managers.machine_deletion_policy_manager import check_machine_unsafe_delete
from si_tests.utils import utils, waiters, templates

LOG = logger.logger


def compare_keyrings_in_pod_and_cluster_and_fix_if_needed(managed_cluster, osd_id_num, target_node, ssh):
    ssh.sudo_mode = True
    LOG.info("Check that cluster is affected by keyring issue")
    LOG.info("Obtain the keyring of a Ceph OSD stored in the Ceph cluster")
    ceph_tools_pod = managed_cluster.k8sclient.pods.list(namespace='rook-ceph', name_prefix='rook-ceph-tools')[0]
    ceph_tools_pod.wait_ready()
    cmd = ['/bin/sh', '-c', f'ceph auth get osd.{osd_id_num}']
    keyring_data_in_pod = ceph_tools_pod.exec(cmd).splitlines()
    osd_in_pod = keyring_data_in_pod[0]
    keyring_in_pod = keyring_data_in_pod[1].strip()
    LOG.info(f"Keyring in pod: {keyring_in_pod}")

    LOG.info("Obtain the keyring value of the host path for the failed Ceph OSD")
    cmd = ['/bin/sh', '-c', 'find /var/lib/rook/rook-ceph -name "keyring"']
    pathes_to_keyring_files = [path_to_keyring_file.strip().decode("utf-8") for path_to_keyring_file
                               in list(ssh.execute(cmd, verbose=True, timeout=60).stdout)]
    assert pathes_to_keyring_files, f"No keyring files on node: {target_node.name}"
    keyring_on_node = ""
    for path in pathes_to_keyring_files:
        cmd = ['/bin/sh', '-c', f'cat {path}']
        keyring_data_on_node = [data.strip().decode("utf-8") for data
                                in list(ssh.execute(cmd, verbose=True, timeout=60).stdout)]
        assert keyring_data_on_node, f"No keyring data in keyring file by path: {path}"
        if keyring_data_on_node[0] == osd_in_pod:
            LOG.info(f"Correct osd.{osd_id_num} was founded on node, continue")
            keyring_on_node = keyring_data_on_node[1]
            break
    assert keyring_on_node, f"Keyring for OSD: {osd_id_num} wasn't founded on target node: {target_node.name}"
    if keyring_in_pod == keyring_on_node:
        LOG.info(f"Cluster wasn't affected we go further without any actions, keyring is same: {keyring_in_pod}")
    else:
        LOG.info(f"Keyrings are different, test needs to apply workaround from PRODX-44228"
                 f"\nAffected OSD: {osd_in_pod} \nAffected node: {target_node.name}"
                 f"\nKeyring in pod: {keyring_in_pod}\nKeyring on node: {keyring_on_node}")
        LOG.info("Change keyring pod with key from node")
        keyring_data_in_pod[1] = "\t" + keyring_on_node
        keyring_data_in_pod = "\n".join(keyring_data_in_pod)
        LOG.info("Create file with new config in pod")
        cmd = ['/bin/sh', '-c', f"echo '{keyring_data_in_pod}' > /tmp/key"]
        create_file_error_result = ceph_tools_pod.exec(cmd, stdout=False, stderr=True)
        assert not create_file_error_result, f"Command: {cmd} failed with error: {create_file_error_result}"
        LOG.info("Import new keyring")
        cmd = ['/bin/sh', '-c', "ceph auth import -i /tmp/key"]
        import_key_error_result = ceph_tools_pod.exec(cmd, stdout=False, stderr=True)
        assert not import_key_error_result, f"Command: {cmd} failed with error: {import_key_error_result}"
        LOG.info("Restart rook-ceph-osd pod")
        rook_ceph_deployment = managed_cluster.k8sclient.deployments.get(
            name=f"rook-ceph-osd-{osd_id_num}",
            namespace="rook-ceph")
        ceph_osd_replicas = rook_ceph_deployment.data['spec']['replicas']
        LOG.info(f"ceph_osd_replicas: {ceph_osd_replicas}")
        rook_ceph_deployment.patch(
            body={"spec": {"replicas": 0}})
        LOG.info('Wait for rook-ceph-osd scale down')
        rook_ceph_deployment.wait_ready(timeout=600, interval=10)
        rook_ceph_deployment.patch(
            body={"spec": {"replicas": 1}})
        LOG.info('Wait for rook-ceph-osd scale up')
        rook_ceph_deployment.wait_ready(timeout=600, interval=10)
        LOG.info("WA from prodx-44228 is successfully applied")


@pytest.mark.usefixtures("introspect_distribution_not_changed")
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_method_time')
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_prepare_ceph_cluster(kaas_manager):
    """
    This test checks rookConfig overrides is working and prepares Ceph cluster for faster rebalance
    https://docs.mirantis.com/container-cloud/latest/operations-guide/manage-ceph/ceph-default-config-opts.html
    """
    managed_ns = kaas_manager.get_namespace(settings.TARGET_NAMESPACE)
    managed_cluster = managed_ns.get_cluster(settings.TARGET_CLUSTER)
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(timeout=2400)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    rookConfig = {
      'rookConfig': {
        'osd|osd_mclock_profile': "high_recovery_ops",
        'osd|osd_mclock_override_recovery_settings': "true",
        'osd|osd_max_backfills': "64",
        'osd|osd_recovery_max_active': "16",
        'osd|osd_recovery_max_active_hdd': "16",
      }
    }

    if managed_cluster.workaround.skip_kaascephcluster_usage():
        rook_ceph_config = {
          'spec': rookConfig,
        }
        ceph_crd = managed_cluster.get_miracephcluster()
        LOG.info("Applying rookConfig for miraceph")
        ceph_crd.patch(rook_ceph_config)
    else:
        rook_ceph_config = {
          'spec': {
            'cephClusterSpec': rookConfig,
          }
        }
        ceph_crd = managed_cluster.get_cephcluster()
        LOG.info("Applying rookConfig for kaascephcluster")
        ceph_crd.patch(rook_ceph_config)

    def check_config_applied():
        not_ready_values = {}
        ceph_tool_pod = managed_cluster.get_ceph_tool_pod()
        config_dump_cmd = [
            '/bin/sh', '-c', 'ceph config dump -f json']
        try:
            config = yaml.safe_load(ceph_tool_pod.exec(config_dump_cmd, container='rook-ceph-tools'))
        except ApiException:
            LOG.info("Tools pod is being restarted. Will retry")
            return False
        for k, v in rookConfig['rookConfig'].items():
            option_with_section = k.split('|')
            if len(option_with_section) == 1:
                key_section = "global"
                key_name = option_with_section
            else:
                key_section = option_with_section[0]
                key_name = option_with_section[1]
            not_ready_values[k] = {'current': None, 'expected': v}
            for config_option in config:
                if config_option["section"] == key_section and config_option["name"] == key_name:
                    del not_ready_values[k]
                    if config_option["value"] == v:
                        LOG.info(f"Current value for {k} is {v}, as expected")
                    else:
                        not_ready_values[k]['current'] = config_option["value"]
                    break
        if not_ready_values:
            LOG.info(f"Next parameters are not applied yet in ceph config:\n{yaml.dump(not_ready_values)}")
            return False
        return True

    waiters.wait(check_config_applied, interval=120, timeout=900)
    LOG.info("Config applied successfully")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status()
    managed_cluster.check.check_k8s_pods()


@pytest.mark.usefixtures("introspect_distribution_not_changed")
@pytest.mark.usefixtures("introspect_ceph_child")
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_method_time')
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_add_bm_ceph_node(kaas_manager):
    """Add one more ceph node."""

    # ns and cluster used for access mgmt cluster
    cluster = kaas_manager.get_mgmt_cluster()
    # managed_ns and managed_cluster used for child cluster
    managed_ns = kaas_manager.get_namespace(settings.TARGET_NAMESPACE)
    managed_cluster = managed_ns.get_cluster(settings.TARGET_CLUSTER)
    ceph_health_timeout = 2400
    # Check or set customHostnamesEnabled flag in Cluster object to match settings.CUSTOM_HOSTNAMES
    managed_cluster.set_custom_hostnames_enabled(flag=settings.CUSTOM_HOSTNAMES)

    # Check ceph health before proceed
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    cmd_status = ['/bin/sh', '-c', 'ceph -s -f json']
    ceph_tools_pod = managed_cluster.get_ceph_tool_pod()

    # Get ssh key
    public_key_name = "{cluster_name}-key".format(
        cluster_name=settings.TARGET_CLUSTER)
    LOG.info("Public key name - {public_key_name}".format(
        public_key_name=public_key_name))
    managed_ns.get_publickey(public_key_name)

    mgmt_version = \
        kaas_manager.get_mgmt_cluster().spec['providerSpec']['value']['kaas'][
            'release']
    LOG.info(f"KaaS mgmt version is:{mgmt_version}")
    # Gather nodes information from yaml file
    child_data = utils.render_child_data(kaas_manager.si_config, {'mgmt_version': mgmt_version})

    bm_hosts_data = child_data['nodes']
    managed_ns.create_bm_statics(child_data)

    worker_scale = []
    for node in bm_hosts_data:
        if 'nodeforscale' in node.get('si_roles', []):
            cred_name = node['name'] + '-cred'
            # Dirty hack, for not copy-paste bmh_name|cred_name across whole test.
            bmh_name = utils.render_bmh_name(node['name'],
                                             settings.TARGET_CLUSTER,
                                             node.get('bootUEFI', True),
                                             node['bmh_labels'],
                                             si_roles=node.get('si_roles',
                                                               False))
            _node = node.copy()
            _node.update({'bmh_name': bmh_name,
                          'cred_name': cred_name})
            worker_scale.append(_node)
            secret_data = {
                "username": _node['ipmi']['username'],
                "password": _node['ipmi']['password']
            }
            if "kaas.mirantis.com/baremetalhost-credentials-name" in _node.get('bmh_annotations', {}):
                if _node['ipmi'].get('monitoringUsername', False):
                    secret_data.update({
                        "monitoringPassword": _node['ipmi']['monitoringPassword'],
                        "monitoringUsername": _node['ipmi']['monitoringUsername']
                    })
                if not kaas_manager.api.kaas_baremetalhostscredentials.present(name=cred_name,
                                                                               namespace=managed_ns.name):
                    region = kaas_manager.get_mgmt_cluster().region_name
                    managed_ns.create_baremetalhostcredential(name=cred_name, data=secret_data, region=region,
                                                              provider="baremetal")
                else:
                    LOG.warning(f'bmhc: {cred_name} already exist, skipping')
            else:
                raise Exception("IPMI credentials supported only over baremetalhostcredentials")

    assert len(worker_scale) >= 1, "BMH with si_roles " \
                                   "'nodeforscale' not found"

    LOG.info("worker_scale: {0}".format(worker_scale))
    node = worker_scale[0]
    bmh = managed_ns.get_baremetalhosts()
    # for case when node was added as compute and then was deleted
    # but bmh still present we can reuse bmh without creation
    existing_bmh = [i for i in bmh if "nodeforscale" in i.name]
    if existing_bmh:
        managed_ns.wait_baremetalhosts_statuses(
            nodes=bmh_name,
            wait_status='ready',
            retries=10,
            interval=30)
    else:
        managed_ns.create_baremetalhost(
            bmh_name=bmh_name,
            bmh_secret=node['cred_name'],
            bmh_mac=node['networks'][0]['mac'],
            bmh_ipmi=node['ipmi'],
            hardwareProfile=node.get('hardwareProfile', False),
            labels=node['bmh_labels'],
            annotations=node.get('bmh_annotations', {}),
            bootUEFI=node.get('bootUEFI', True),
            bmhi_credentials_name=node['cred_name'])
        LOG.info(f"New BMH nodes:\n{bmh_name}")

    managed_ns.wait_baremetalhosts_statuses(
        nodes=bmh_name,
        wait_status='ready',
        retries=20,
        interval=60)

    # Create worker machine
    LOG.info('Create BareMetal machine')
    custom_bmhp = False
    release_name = managed_cluster.clusterrelease_version
    distribution = utils.get_distribution_for_node(kaas_manager, node, release_name)
    disk_count = 0
    if node.get('bmh_profile'):
        custom_bmhp = {
            'namespace': settings.TARGET_NAMESPACE,
            'name': node['bmh_profile']
        }
        ceph_config = node['ceph_cluster_node']
        if managed_cluster.workaround.skip_kaascephcluster_usage():
            disk_count = len(ceph_config['devices'])
        else:
            disk_count = len(ceph_config['storageDevices'])
    new_machine = managed_cluster.create_baremetal_machine(
            genname=node['bmh_name'],
            node_pubkey_name=public_key_name,
            matchlabels={'kaas.mirantis.com/'
                         'baremetalhost-id':
                             node['bmh_labels']
                             ['kaas.mirantis.com/baremetalhost-id']},
            baremetalhostprofile=custom_bmhp,
            l2TemplateSelector=node.get('l2TemplateSelector', dict()),
            labels=node['machine_labels'],
            distribution=distribution,
        )

    managed_ns.wait_baremetalhosts_statuses(
        nodes=bmh_name,
        wait_status='provisioned',
        retries=50,
        interval=60)

    managed_cluster.check.check_machines_status()
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        ceph_crd = managed_cluster.get_miracephcluster()
        nodes = ceph_crd.read().spec['nodes']
        node = managed_cluster.get_machine(new_machine.name)
        ceph_config['name'] = node.get_k8s_node_name()
        nodes.append(ceph_config)
        add_ceph_node = {"spec": {"nodes": nodes}}
        LOG.info("MiraCeph CRD will be patched with next data:{}{}".format(
            "\n", yaml.dump(add_ceph_node)))
    else:
        ceph_crd = managed_cluster.get_cephcluster()
        add_ceph_node = {
              "spec": {
                "cephClusterSpec": {
                  "nodes": {
                    new_machine.name: ceph_config
                  }
                }
              }
            }
        LOG.info("KaasCephcluster CRD will be patched with next data:{}{}".format(
            "\n", yaml.dump(add_ceph_node)))
    # Check if HEALTH_OK before patching
    LOG.info("Check if HEALTH_OK before patching CRD")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    # Get osds num before patch and calculate num after
    num_osds_before_patch = int(yaml.safe_load(ceph_tools_pod.exec(
        cmd_status)).get('osdmap').get('num_osds'))
    num_osds_after_patch = num_osds_before_patch + disk_count
    LOG.info("num_osds_before_patch: {}".format(num_osds_before_patch))
    LOG.info("num_osds_after_patch: {}".format(num_osds_after_patch))
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Patching miraceph CRD")
        managed_cluster.patch_ceph_data(data=add_ceph_node, crd=ceph_crd)
    else:
        LOG.info("Patching kaascephcluster CRD")
        cluster.patch_ceph_data(data=add_ceph_node, crd=ceph_crd)
    time.sleep(5)
    # Wait for num osds is inceased depending on disks count
    LOG.info("Waiting for OSDs number is increased in ceph cluster")
    waiters.wait(
        lambda: yaml.safe_load(ceph_tools_pod.exec(cmd_status)).get(
            'osdmap').get('num_osds') == num_osds_after_patch,
        timeout=ceph_health_timeout, interval=30,
        timeout_msg="OSDs number doesn't match expected number after {} sec. "
                    "Current osd number is {}. But shoud be {}".format(
            ceph_health_timeout,
            yaml.safe_load(ceph_tools_pod.exec(cmd_status)).get(
                'osdmap').get(
                'num_osds'), num_osds_after_patch))

    # Wait for all osds are UP in cluster
    LOG.info("Waiting for all OSDs are UP in ceph cluster")
    waiters.wait(
        lambda: yaml.safe_load(ceph_tools_pod.exec(cmd_status)).get(
            'osdmap').get(
            'num_up_osds') == num_osds_after_patch,
        timeout=ceph_health_timeout, interval=30,
        timeout_msg="Some osds are not UP after {} sec. "
                    "Current osds UP {}. But shoud be {}".format(
            ceph_health_timeout,
            yaml.safe_load(ceph_tools_pod.exec(cmd_status)).get(
                'osdmap').get(
                'num_up_osds'), num_osds_after_patch))

    # Wait for HEALTH_OK after osds were added
    LOG.info("Final check ceph cluster status")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        managed_cluster.check.wait_miraceph_phase()
        managed_cluster.check.wait_miracephhealth_state()
    else:
        managed_cluster.check.wait_kaascephcluster_state()
    managed_cluster.check.check_ceph_pvc()
    managed_cluster.check.check_cluster_readiness()
    LOG.info("Ceph cluster is healthy. Congratulations!")

    # Check that Machine hostname is created with respect to Cluster flag 'customHostnamesEnabled'
    managed_cluster.check.check_custom_hostnames_on_machines(
        machines=[managed_cluster.get_machine(name=new_machine.name)])

    # Check correct runtime
    if settings.DESIRED_RUNTIME:
        machines = managed_cluster.get_machines()
        scale_machine_for_runtime_check = [machine for machine in machines if 'nodeforscale' in machine.name][0]
        managed_cluster.check.compare_machines_runtime_with_desired([scale_machine_for_runtime_check],
                                                                    machine_is_new=True)


@pytest.mark.usefixtures("introspect_distribution_not_changed")
@pytest.mark.usefixtures("introspect_ceph_child")
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_method_time')
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_replace_failed_osd_disk(kaas_manager, show_step):
    """This test emulates disk crashing and replacing

    Scenario:
       1. Get information about ceph cluster using ceph tools pod.
       2. Define node and disk for tests
       3. Emulate crash
       4. Check required signals (WARNING,RECENT_CRASH,OSD_DOWN)
       5. Remove osd from kaascephcluster. Create osd remove request
       6. Emulate disk change (Reboot node)
       7. Add device back to kaascephcluster
       8. Wait for new osd spawned
       9. Verify that ceph cluster is healthy after all operations
    """

    cluster = kaas_manager.get_mgmt_cluster()
    managed_ns_name = settings.TARGET_NAMESPACE
    managed_cluster_name = settings.TARGET_CLUSTER
    managed_ns = kaas_manager.get_namespace(managed_ns_name)
    managed_cluster = managed_ns.get_cluster(managed_cluster_name)

    # Determine node for working with disk
    nodes = managed_cluster.get_machines()
    target = [i for i in nodes if "nodeforscale" in i.name]
    assert target, "Scale machine was not found in cluster"
    target_node = target[0]
    failed_disk_node_ip = target_node.public_ip

    # Set SSH connection to machine
    keys = utils.load_keyfile(settings.KAAS_CHILD_CLUSTER_PRIVATE_KEY_FILE)
    pkey = utils.get_rsa_key(keys['private'])
    auth = exec_helpers.SSHAuth(username='mcc-user',
                                password='', key=pkey)
    ssh = exec_helpers.SSHClient(
        host=failed_disk_node_ip, port=22, auth=auth)
    ssh.logger.addHandler(logger.console)
    ssh.sudo_mode = True

    show_step(1)
    ceph_health_timeout = 2400
    # Check ceph health before proceed
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
                 timeout=ceph_health_timeout)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    cmd_status = ['/bin/sh', '-c', 'ceph -s -f json']
    cmd_osd_tree = ['/bin/sh', '-c', 'ceph osd tree -f json']
    archive_cmd = ['/bin/sh', '-c', 'ceph crash archive-all']
    ceph_tools_pod = managed_cluster.get_ceph_tool_pod()
    show_step(2)
    failed_disk_name = ''
    failed_disk_full_path = ''
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        failed_disk_node_name = target_node.get_k8s_node_name()
        ceph_crd = managed_cluster.get_miracephcluster()
        ceph_crd = ceph_crd.read()
        for node in ceph_crd.spec['nodes']:
            if node['name'] == failed_disk_node_name:
                failed_disk_name = node['devices'][0].get('name', "")
                if not failed_disk_name:
                    # It means that we work with disk by-id and we don't have name
                    failed_disk_full_path = node['devices'][0].get('fullpath', "")
                break
    else:
        failed_disk_node_name = target_node.name
        ceph_crd = managed_cluster.get_cephcluster()
        ceph_crd = ceph_crd.read()
        node = ceph_crd.spec['cephClusterSpec']['nodes'][failed_disk_node_name]
        failed_disk_name = node['storageDevices'][0].get('name', "")
        if not failed_disk_name:
            # It means that we work with disk by-id and we don't have name
            failed_disk_full_path = node['storageDevices'][0].get('fullPath', "")
    if not failed_disk_name:
        if not failed_disk_full_path:
            assert failed_disk_full_path, "Disk fullPath field is EMPTY as Name"
        failed_disk_name = ssh.check_call("udevadm info --query=name {}".format(failed_disk_full_path)).stdout_str
    LOG.info("Working with disk {disk} on node {node}.".format(
        disk=failed_disk_name, node=failed_disk_node_name))
    LOG.info("Verify that all osds are UP")
    osds_info = yaml.safe_load(ceph_tools_pod.exec(cmd_osd_tree))
    osds = [node for node in osds_info.get('nodes') if
            node.get('type') == 'osd']
    nodes = [node for node in osds_info.get('nodes') if
             node.get('type') == 'host']
    osds_dict = {}
    nodes_dict = {}
    for osd in osds:
        osds_dict[osd.get('name')] = {'status': osd.get('status'),
                                      'reweight': osd.get('reweight')}
    for node in nodes:
        nodes_dict[node.get('name')] = {'osds': [
            'osd.' + str(child) for child in node.get('children')]}
    LOG.info("Ceph nodes info:{}{}".format("\n", yaml.dump(nodes_dict)))
    LOG.info("Ceph osds info:{}{}".format("\n", yaml.dump(osds_dict)))

    show_step(3)
    # Here we will simulate disk crash
    LOG.info("Cleaning {disk} disk on node {node} "
             "and remove it from system".format(
                 disk=failed_disk_name, node=failed_disk_node_name))
    LOG.info(f"Going to ssh on {failed_disk_node_ip}")
    ssh.check_call(
        "dd if=/dev/zero of=/dev/{} bs=100M count=10".format(
            failed_disk_name))
    ssh.check_call(
        "echo '1' > /sys/block/{}/device/delete".format(
            failed_disk_name))

    show_step(4)
    # Wait for HEALTH_WARN
    LOG.info("Waiting for WARNING signal after disk was crashed.")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_WARN", f'Health is not OK. Will not proceed. ' \
                                                       f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout, expected_status='HEALTH_WARN')

    # Check that only 1 osd is down and belongs to choosen node
    osds_info = yaml.safe_load(ceph_tools_pod.exec(cmd_osd_tree))
    osds_down = [node.get('name') for node in osds_info.get(
        'nodes') if node.get('type') == 'osd' and node.get(
        'status') == 'down']
    assert len(osds_down) == 1, ("More that 1 osds are down: {}".format(
        osds_down))
    osds = [node for node in osds_info.get('nodes') if
            node.get('type') == 'osd']
    for osd in osds:
        osds_dict[osd.get('name')] = {'status': osd.get('status'),
                                      'reweight': osd.get('reweight')}
    LOG.info("Current osds stats: {}{}".format("\n", yaml.dump(osds_dict)))

    # Now we need to wait until information about crash appears in ceph status
    LOG.info("Waiting for information about "
             "recent crash appears in ceph status")
    waiters.wait(
        lambda: 'RECENT_CRASH' in yaml.safe_load(
            ceph_tools_pod.exec(cmd_status)).get('health', {}).get(
            'checks'), timeout=ceph_health_timeout, interval=30)

    LOG.info("Archiving information about crash")
    ceph_tools_pod.exec(archive_cmd)

    # Wait for rebalancing and HEALTH_OK.
    LOG.info("Waiting for data rebalanced and HEALTH is OK")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)

    show_step(5)
    # Define osd num which is down after disk crash
    LOG.info("Define osd ID for further work")
    osd_num = osds_down[0]
    osd_id_num = osd_num.split('.')[-1]
    LOG.info(f'OSD ID is {osd_num} and osd_id_num is {osd_id_num}')

    # Remove storage device from kaascephcluster
    LOG.info("Removing storage device from kaascephcluster")
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        new_crd = managed_cluster.get_miracephcluster()
        nodes = new_crd.read().spec['nodes']
        for node in nodes:
            if node['name'] == failed_disk_node_name:
                for dev in node['devices']:
                    if not failed_disk_full_path:
                        if dev['name'] == failed_disk_name:
                            node['devices'].remove(dev)
                            break
                    else:
                        if dev['fullpath'] == failed_disk_full_path:
                            node['devices'].remove(dev)
                            break
        new_crd.patch({'spec': {'nodes': nodes}})
        ceph_operation_yaml_path = settings.CEPH_OSDREMOVEREQUEST_YAML_PATH
        template = templates.render_template(ceph_operation_yaml_path)
        ceph_operation_conf = yaml.load(template, Loader=yaml.SafeLoader)
        osd_remove_info_map = {'approve': True,
                               'nodes': {failed_disk_node_name: {'cleanupByOsdId': [int(osd_id_num)]}}}
        ceph_operation_conf['spec'] = osd_remove_info_map
        ceph_request = managed_cluster.k8sclient.cephosdremoverequests.create(namespace='ceph-lcm-mirantis',
                                                                              body=ceph_operation_conf)
        ceph_request_name = ceph_request.data['metadata']['name']

        # Wait for phase CompletedWithWarnings - because disk is failed and cleanup job cant run
        LOG.info("Wait for CephOsdRemoveRequest complete actions")
        managed_cluster.check.wait_cephosdremoverequest_phase(name=ceph_request_name, namespace='ceph-lcm-mirantis',
                                                              expected_request_phase='Completed')
        LOG.info("CephOsdRemoveRequest has completed actions")
    else:
        new_crd = managed_cluster.get_cephcluster()
        nodeDevices = new_crd.read().spec['cephClusterSpec']['nodes'][failed_disk_node_name]['storageDevices']
        for dev in nodeDevices:
            if not failed_disk_full_path:
                if dev['name'] == failed_disk_name:
                    nodeDevices.remove(dev)
                    break
            else:
                if dev['fullPath'] == failed_disk_full_path:
                    nodeDevices.remove(dev)
                    break
        new_crd.patch(
            {'spec': {'cephClusterSpec': {'nodes': {failed_disk_node_name: {'storageDevices': nodeDevices}}}}})
        LOG.info("Waiting for miraceph spec nodes updated after kaascepcluster patch")
        managed_cluster.check.wait_miraceph_nodes_updated()
        ceph_operation_yaml_path = settings.CEPH_OPERATIONREQUEST_YAML_PATH
        template = templates.render_template(ceph_operation_yaml_path)
        ceph_operation_conf = yaml.load(template, Loader=yaml.SafeLoader)
        osd_remove_info_map = {'approve': True,
                               'nodes': {failed_disk_node_name: {'cleanupByOsdId': [int(osd_id_num)]}}}
        ceph_operation_conf['spec']['osdRemove'] = osd_remove_info_map
        ceph_request = cluster.k8sclient.kaas_cephoperationrequests.create(namespace=managed_ns_name,
                                                                           body=ceph_operation_conf)
        ceph_request_name = ceph_request.data['metadata']['name']

        # Wait for phase Completed
        # Since 2.23 operation and request phases should be Completed
        LOG.info("Wait for KaaSCephOperationRequest complete actions")
        cluster.check.wait_cephoperationrequest_state(name=ceph_request_name, namespace=managed_ns_name,
                                                      expected_request_phase='Completed',
                                                      expected_operation_phase='Completed',
                                                      operation_key='osdRemoveStatus')
        LOG.info("KaaSCephOperationRequest has completed actions")

    show_step(6)
    LOG.info("Emulate new disk installed")
    LOG.info("Rebooting node {}".format(failed_disk_node_name))
    ssh.reconnect()
    ssh.check_call("iptables -A INPUT -p tcp --dport 22 -j REJECT; "
                   "/sbin/shutdown -r -f now &>/dev/null & exit")
    LOG.info("Node is being rebooted.")
    waiters.wait_tcp(failed_disk_node_ip, port=22, timeout=600)
    LOG.info("Node is rebooted succesfully. Establishing new connection...")
    waiters.wait_ssh_available(host=failed_disk_node_ip, port=22, auth=auth, timeout=300, interval=30)
    ssh = exec_helpers.SSHClient(host=failed_disk_node_ip, port=22, auth=auth)
    ssh.logger.addHandler(logger.console)

    # Adding device back to spec
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Waiting for MiraCeph Ready")
        managed_cluster.check.wait_miracephhealth_state()
        show_step(7)
        LOG.info("Adding device back to MiraCeph")
        new_crd = managed_cluster.get_miracephcluster()
        new_crd_for_nodes = new_crd.read()
        nodes = new_crd_for_nodes.spec['nodes']
        for node in nodes:
            if node['name'] == failed_disk_node_name:
                node['devices'].append({'config': {'deviceClass': 'hdd'}, 'name': failed_disk_name})
        new_crd.patch({'spec': {'nodes': nodes}})
    else:
        LOG.info("Waiting for KaasCephCluster Ready")
        managed_cluster.check.wait_kaascephcluster_state()
        show_step(7)
        LOG.info("Adding device back to KaasCephCluster")
        new_crd = managed_cluster.get_cephcluster()
        devices = new_crd.read().spec['cephClusterSpec']['nodes'][failed_disk_node_name]['storageDevices']
        devices.append({'config': {'deviceClass': 'hdd'}, 'name': failed_disk_name})
        new_crd.patch(
            {'spec': {'cephClusterSpec': {'nodes': {failed_disk_node_name: {'storageDevices': devices}}}}})

    # Wait until rook-ceph-osd pod is spawned before checking WA
    osd_up_timeout = 600
    waiters.wait(lambda: [p for p in managed_cluster.k8sclient.pods.list(namespace="rook-ceph")
                          if f'rook-ceph-osd-{osd_id_num}' in p.name and 'rook-ceph-osd-prepare' not in p.name],
                 timeout=osd_up_timeout,
                 interval=20,
                 timeout_msg=f"Timeout for waiting pod rook-ceph-osd-{osd_id_num} after {osd_up_timeout} sec.")
    if cluster.workaround.prodx_44228():
        compare_keyrings_in_pod_and_cluster_and_fix_if_needed(managed_cluster, osd_id_num, target_node, ssh)

    show_step(8)
    # Wait for new osd appears in cluster
    LOG.info("Waiting for {} appears in ceph cluster".format(
        osd_num))
    waiters.wait(
        lambda: osd_num in [osd.get('name') for osd in yaml.safe_load(
            ceph_tools_pod.exec(cmd_osd_tree)).get('nodes')],
        timeout=ceph_health_timeout, interval=30,
        timeout_msg="{} is not existed in ceph cluster "
                    "after {} sec".format(osd_num, ceph_health_timeout))

    LOG.info("OSD {} existed in ceph cluster. Continue".format(osd_num))
    show_step(9)
    LOG.info("Check k8s pods in managed cluster")
    managed_cluster.check.check_k8s_pods()
    LOG.info("Waiting for data rebalanced and HEALTH is OK")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        managed_cluster.check.wait_miraceph_phase()
        managed_cluster.check.wait_miracephhealth_state()
    else:
        managed_cluster.check.wait_kaascephcluster_state()
    managed_cluster.check.check_ceph_pvc()
    managed_cluster.check.check_cluster_readiness()
    LOG.info("Everything seems to be fine. Congratulations!")


@pytest.mark.usefixtures("introspect_distribution_not_changed")
@pytest.mark.usefixtures("introspect_ceph_child")
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_method_time')
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_add_remove_additional_osds_by_path_by_id_by_device(kaas_manager):
    """
    This test adds additional disks to ceph cluster. After disks are added
    determine 4 random osds for removing. We use 4 because of 4 different way
    of osd deleting. For example
    osd_path: '/dev/disk/by-path/pci-0000:00:1c.5'
    osd_id: 2
    osd_dev: 'sdb'
    osd_by_id: '/dev/disk/by-id/wwn-3495548383458640021103'
    After disks are choosen, form appropriate request yaml for remove request.
    Remove these disks from kaascephcluster, then create osd remove request
    and wait for it has Completed status, which means everything should be removed
    and cleaned up.

    """
    cluster = kaas_manager.get_mgmt_cluster()
    managed_ns_name = settings.TARGET_NAMESPACE
    managed_cluster_name = settings.TARGET_CLUSTER
    managed_ns = kaas_manager.get_namespace(managed_ns_name)
    managed_cluster = managed_ns.get_cluster(managed_cluster_name)
    ceph_tools_pod = managed_cluster.get_ceph_tool_pod()

    rook_ns = settings.ROOK_CEPH_NS
    ceph_health_timeout = 2400
    # Check ceph health before proceed
    LOG.info("Waiting for Ceph health is HEALTH_OK")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCeph Ready before patching")
        managed_cluster.check.wait_miracephhealth_state()
        ceph_crd_data = managed_cluster.get_miracephcluster().data
        nodes_data_orig = ceph_crd_data['spec']['nodes']
        nodes_data = {}
        # transform miraceph nodes to kaasceph style, remove once kaasceph dropped
        for node in nodes_data_orig:
            nodes_data[node['name']] = node
    else:
        LOG.info("Check kaascephcluster Ready before patching")
        managed_cluster.check.wait_kaascephcluster_state()
        ceph_crd_data = managed_cluster.get_cephcluster().data
        nodes_data = ceph_crd_data['spec']['cephClusterSpec']['nodes']
    nodes_disks_map = {}
    available_disks = {}
    all_disks = {}
    current_osd_number = len([
        deploy for deploy in managed_cluster.k8sclient.deployments.list(namespace=rook_ns) if
        'rook-ceph-osd' in deploy.name])
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        device_key = 'devices'
    else:
        device_key = 'storageDevices'
    for k, v in nodes_data.items():
        if device_key in v:
            nodes_disks_map[k] = []
            for d in v.get(device_key):
                if 'name' in d:
                    nodes_disks_map[k].append(d['name'])
                else:
                    if managed_cluster.workaround.skip_kaascephcluster_usage():
                        nodes_disks_map[k].append(d['fullpath'])
                    else:
                        nodes_disks_map[k].append(d['fullPath'])
            if managed_cluster.workaround.skip_kaascephcluster_usage():
                machine = managed_cluster.get_machine_by_k8s_name(name=k)
                if not machine:
                    raise Exception(f"Machine with K8S name {k} is not existed, but configured for ceph cluster")
                machineName = machine.name
            else:
                machine = kaas_manager.get_machine(name=k, namespace=managed_ns_name)
                if not machine:
                    raise Exception(f"Machine {k} is not existed, but configured for ceph cluster")
                machineName = k
            actual_disks = machine.data['status']['providerStatus']['hardware']['storage']
            # Exclude system disk from list
            bmh = [b for b in kaas_manager.get_baremetalhosts(namespace=managed_ns_name) if
                   b.data['spec']['consumerRef']['name'] == machineName]
            if not bmh:
                raise Exception(f"Something went wrong. No BMH available with consumerRef for machine {machineName}")
            bmh_data = bmh[0].data
            root_device = bmh_data['status']['provisioning']['rootDeviceHints']['deviceName'].split('/dev/')[1]
            exclude_devices = list()
            exclude_devices.append(root_device)
            # Exclude disks which are used for RAID

            ansible_extra = managed_ns.get_ansibleextra(
                name=machine.data['metadata']['annotations']['metal3.io/BareMetalHost'].split('/')[-1])
            storage_data = ansible_extra.data.get('spec', {}).get('target_storage', [])

            for disk in storage_data:
                if disk.get('type', '') == 'physical':
                    disk_name = disk['by_name'].split('/dev/')[1]
                    for schema in disk.get('partition_schema', []):
                        if 'raid' in schema.get('partflags', '') and disk_name not in exclude_devices:
                            exclude_devices.append(disk_name)
                elif disk.get('type', '') == 'md':
                    md_devices_names = [
                        dev.get('name', '').split('/dev/')[-1] for dev in disk.get('md_devices', [])]
                    exclude_devices.extend(md_devices_names)

            # Exclude all disks w/o serianNumber. It's a virtual disks like dm-0, dm-1 etc.
            for disk in actual_disks:
                mach = re.search(r"([^\/]+$)", disk.get('name'))
                if mach and "serialNumber" not in disk:
                    exclude_devices.append(mach.group(0))
            disks_name = []
            for disk in actual_disks:
                LOG.debug("Machine {} - full Ceph disk name: {}".format(machineName, disk.get('name')))
                mach = re.search(r"([^\/]+$)", disk.get('name'))
                if mach and mach.group(0) not in exclude_devices:
                    disks_name.append(mach.group(0))
                    LOG.debug("Stored name: {}".format(mach.group(0)))
            all_disks[k] = disks_name
    for k, v in all_disks.items():
        used_disks = nodes_disks_map[k]
        for disk in used_disks:
            if disk in all_disks[k]:
                all_disks[k].remove(disk)
        # Add one disk for every node
        if all_disks[k]:
            available_disks[k] = all_disks[k][0]
    if not available_disks:
        msg = "No disks available for adding to ceph cluster. Skipping"
        LOG.info(msg)
        pytest.skip(msg)
    LOG.info(f"Next disks are available for adding to ceph cluster:\n{yaml.dump(available_disks)}")
    total_disks_number_to_add = len([v for k, v in available_disks.items()])
    if total_disks_number_to_add < 4:
        msg = (f"4 or more disks are required for this test, but only {total_disks_number_to_add} found. "
               f"Skipping test")
        LOG.info(msg)
        pytest.skip(msg)
    total_osds_after_patch = current_osd_number + total_disks_number_to_add
    LOG.info("Waiting for Ceph health is HEALTH_OK")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCeph Ready before patching")
        managed_cluster.check.wait_miracephhealth_state(timeout=3600)
        ceph_crd = managed_cluster.get_miracephcluster()
        new_crd = ceph_crd.read()
        for k, v in available_disks.items():
            # All disks in ceph cluster should have similar device class(hdd or ssd)
            for node in new_crd.spec['nodes']:
                if node['name'] == k:
                    node[device_key].extend([{'config': {'deviceClass': 'hdd'}, 'name': v}])
        ceph_crd.patch(body={'spec': new_crd.spec})
    else:
        LOG.info("Check KaasCephCluster Ready before patching")
        managed_cluster.check.wait_kaascephcluster_state(timeout=3600)
        ceph_crd = managed_cluster.get_cephcluster()
        new_crd = ceph_crd.read()
        for k, v in available_disks.items():
            # All disks in ceph cluster should have similar device class(hdd or ssd)
            new_crd.spec['cephClusterSpec']['nodes'][k]['storageDevices'].extend(
                [{'config': {'deviceClass': 'hdd'}, 'name': v}])

        ceph_crd.patch(body={'spec': new_crd.spec})
    LOG.info("Wait for new osds are spawned")
    waiters.wait(lambda: len(
        [d for d in managed_cluster.k8sclient.deployments.list(
            namespace=rook_ns) if 'rook-ceph-osd-' in d.name]) == total_osds_after_patch,
                 timeout=ceph_health_timeout, interval=30,
                 timeout_msg="osd pods number is not equal to osd disks count "
                             "in ceph cluster. Current osd pods number is {}. "
                             "But shoud be {}".format(
                     len([d for d in managed_cluster.k8sclient.deployments.list(
                         namespace=rook_ns) if 'rook-ceph-osd-' in d.name]),
                     total_osds_after_patch))
    LOG.info("All pods are spawned")
    managed_cluster.check.check_k8s_pods()
    LOG.info("Waiting for Ceph health is HEALTH_OK")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCeph Ready before patching")
        managed_cluster.check.wait_miracephhealth_state(timeout=3600)
        LOG.info("Check MiraCeph cluster will satisfy minimal replication factor requirement after osds removal")
        ceph_osds_total = managed_cluster.get_miracephcluster_osd_number()
        ceph_min_replica_size = managed_cluster.get_miracephcluster_min_replica_size()
    else:
        LOG.info("Check KaasCephCluster Ready before patching")
        managed_cluster.check.wait_kaascephcluster_state(timeout=3600)
        LOG.info("Check KaasCephCluster will satisfy minimal replication factor requirement after osds removal")
        ceph_osds_total = managed_cluster.get_kaascephcluster_osd_number()
        ceph_min_replica_size = managed_cluster.get_kaascephcluster_min_replica_size()
    if ceph_osds_total - 4 < ceph_min_replica_size:
        msg = (f"After osds remove Ceph cluster will have only {ceph_osds_total - 4} osds "
               f"which is not sufficient for a minimal replication factor equal to {ceph_min_replica_size}. "
               f"Skipping test")
        LOG.info(msg)
        pytest.skip(msg)
    # Create template for disks removing.
    LOG.info("Prepare template for osd removing")
    # Get 3 random osds from different node to check: remove_by_path, by_id, by_device simultaneously
    osds_to_remove = []
    osd_pods_all = [p for p in managed_cluster.k8sclient.pods.list(namespace=rook_ns) if
                    'rook-ceph-osd-' in p.name and 'rook-ceph-osd-prepare' not in p.name]
    uniq_osds = []
    pod_nodes = []

    # Filter pods from same nodes, and also take nodes only with 2 or more disks
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        filter_ceph_crd = managed_cluster.get_miracephcluster().read()
        ceph_operation_yaml_path = settings.CEPH_OSDREMOVEREQUEST_YAML_PATH
    else:
        filter_ceph_crd = managed_cluster.get_cephcluster().read()
        ceph_operation_yaml_path = settings.CEPH_OPERATIONREQUEST_YAML_PATH
    for po in osd_pods_all:
        if managed_cluster.workaround.skip_kaascephcluster_usage():
            node_spec_name = po.data['spec']['node_name']
            for node in filter_ceph_crd.spec['nodes']:
                if node['name'] == node_spec_name:
                    if node_spec_name not in pod_nodes and len(node['devices']) > 1:
                        pod_nodes.append(node_spec_name)
                        uniq_osds.append(po)
                    break
        else:
            pod_node = po.data['spec']['node_name']
            pod_machine_name = [m.name for m in kaas_manager.get_lcmmachines(namespace=managed_ns_name) if
                                pod_node in [host['address'] for host in m.data['status']['addresses']]]
            if pod_machine_name:
                pod_machine_name = pod_machine_name[0]
            else:
                raise Exception(f"No lcm-machines found in child cluster {managed_ns_name}")
            if pod_node not in pod_nodes and len(
                    filter_ceph_crd.spec['cephClusterSpec']['nodes'][pod_machine_name]['storageDevices']) > 1:
                pod_nodes.append(pod_node)
                uniq_osds.append(po)
    random_osds = random.sample(uniq_osds, 4)
    template = templates.render_template(ceph_operation_yaml_path)
    ceph_operation_conf = yaml.load(template, Loader=yaml.SafeLoader)
    cmd_osd_dev = ['/bin/sh', '-c', 'ceph device ls -f json']
    ceph_device_data = yaml.safe_load(ceph_tools_pod.exec(cmd_osd_dev))
    for pod in random_osds:
        pod_name = pod.name
        osd_id = pod_name.split('-')[3]
        pod_node = pod.data['spec']['node_name']
        osd_dev = [p['location'][0]['dev'] for p in ceph_device_data if p['daemons'][0] == 'osd.' + osd_id][0]
        osd_devid = [p['devid'] for p in ceph_device_data if p['daemons'][0] == 'osd.' + osd_id][0]
        pod_machine_name = [m.name for m in kaas_manager.get_lcmmachines(namespace=managed_ns_name) if
                            pod_node in [host['address'] for host in m.data['status']['addresses']]][0]
        osd_machine = managed_ns.get_machine(pod_machine_name).data
        osd_dev_by_id = [storage['byID'] for storage in osd_machine['status']['providerStatus']['hardware']['storage']
                         if any([osd_devid in disk_id for disk_id in storage.get('byIDs', [])])][0]
        osd_path = [storage['byPath'] for storage in osd_machine['status']['providerStatus']['hardware']['storage']
                    if any([osd_devid in disk_id for disk_id in storage.get('byIDs', [])])][0]
        if managed_cluster.workaround.skip_kaascephcluster_usage():
            nodeSpecName = pod_node
        else:
            nodeSpecName = pod_machine_name
        osds_to_remove.append({nodeSpecName: {'osd_id': osd_id,
                                              'osd_path': osd_path,
                                              'osd_dev': osd_dev,
                                              'osd_by_id': osd_dev_by_id}})

    LOG.info(f"Next osds will be removed from cluster: \n{yaml.dump(osds_to_remove)}")
    LOG.info("Waiting for Ceph health is HEALTH_OK")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCeph Ready before patching")
        managed_cluster.check.wait_miracephhealth_state(timeout=3600)
        new_crd = managed_cluster.get_miracephcluster()
        new_ceph_crd = new_crd.read()
    else:
        LOG.info("Check KaasCephCluster Ready before patching")
        managed_cluster.check.wait_kaascephcluster_state(timeout=3600)
        new_crd = managed_cluster.get_cephcluster()
        new_ceph_crd = new_crd.read()

    # Config data for 4 different ways for deletion
    def remove_case(idx, v):
        remove_cases = {
            0: {'cleanupByDevice': [{'name': str(v.get('osd_dev'))}]},
            1: {'cleanupByDevice': [{'path': str(v.get('osd_path'))}]},
            2: {'cleanupByOsdId': [int(v.get('osd_id'))]},
            3: {'cleanupByDevice': [{'path': str(v.get('osd_by_id'))}]}
        }
        return remove_cases[idx]

    def remove_device(ceph_devices, osd_info):
        for dev in ceph_devices:
            if 'name' in dev:
                if dev['name'] == osd_info.get('osd_dev'):
                    ceph_devices.remove(dev)
            elif 'fullpath' in dev:
                if (dev['fullpath'] == osd_info.get('osd_path')
                        or dev['fullpath'] == osd_info.get('osd_by_id')):
                    ceph_devices.remove(dev)
            elif 'fullPath' in dev:
                if (dev['fullPath'] == osd_info.get('osd_path')
                        or dev['fullPath'] == osd_info.get('osd_by_id')):
                    ceph_devices.remove(dev)
        return ceph_devices

    for idx in range(4):
        for k, v in osds_to_remove[idx].items():
            if managed_cluster.workaround.skip_kaascephcluster_usage():
                ceph_operation_conf['spec']['nodes'][k] = remove_case(idx, v)
                for node in new_ceph_crd.spec['nodes']:
                    if node['name'] == k:
                        ceph_devices_updated = remove_device(node['devices'], v)
                        node['devices'] = ceph_devices_updated
                        break
            else:
                ceph_operation_conf['spec']['osdRemove']['nodes'][k] = remove_case(idx, v)
                ceph_devices_updated = remove_device(
                    new_ceph_crd.spec['cephClusterSpec']['nodes'][k]['storageDevices'], v)
                new_ceph_crd.spec['cephClusterSpec']['nodes'][k]['storageDevices'] = ceph_devices_updated

    # Remove osds from kaascephcluster
    new_crd.patch(body={'spec': new_ceph_crd.spec})
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        # Create kaascephoperationrequest
        ceph_request = managed_cluster.k8sclient.cephosdremoverequests.create(namespace='ceph-lcm-mirantis',
                                                                              body=ceph_operation_conf)
        ceph_request_name = ceph_request.data['metadata']['name']
        LOG.info(f"Request {ceph_request_name} created. Waiting for request completed")
        managed_cluster.check.wait_cephosdremoverequest_phase(name=ceph_request_name, namespace='ceph-lcm-mirantis',
                                                              expected_request_phase='Completed')
    else:
        LOG.info("Waiting for miraceph spec nodes updated after kaascepcluster patch")
        managed_cluster.check.wait_miraceph_nodes_updated()
        # Create kaascephoperationrequest
        ceph_request = cluster.k8sclient.kaas_cephoperationrequests.create(namespace=managed_ns_name,
                                                                           body=ceph_operation_conf)
        ceph_request_name = ceph_request.data['metadata']['name']
        LOG.info(f"Request {ceph_request_name} created. Waiting for request completed")
        cluster.check.wait_cephoperationrequest_state(name=ceph_request_name, namespace=managed_ns_name,
                                                      expected_request_phase='Completed',
                                                      expected_operation_phase='Completed',
                                                      operation_key='osdRemoveStatus')
    LOG.info(f"Request {ceph_request_name} has completed actions")
    LOG.info("Check ceph health status and pods Running")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
             timeout=ceph_health_timeout)
    managed_cluster.check.check_k8s_pods()
    LOG.info("Ceph is healthy, pods are running")
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCeph Ready")
        managed_cluster.check.wait_miraceph_phase()
        managed_cluster.check.wait_miracephhealth_state(timeout=3600)
    else:
        LOG.info("Check kaascephcluster Ready")
        managed_cluster.check.wait_kaascephcluster_state(timeout=3600)
    managed_cluster.check.check_ceph_pvc()
    managed_cluster.check.check_cluster_readiness()
    LOG.info("Everything seems to be fine. Congratulations!")


@pytest.mark.usefixtures("introspect_distribution_not_changed")
@pytest.mark.usefixtures("introspect_ceph_child")
@pytest.mark.usefixtures("collect_downtime_statistics")     # Should be used if ALLOW_WORKLOAD == True
@pytest.mark.usefixtures('log_method_time')
@pytest.mark.usefixtures("check_ceph_keyrings")
def test_remove_ceph_osd_node(kaas_manager, show_step):
    """This test removes osd node from already created ceph cluster.

    Scenario:
       1. Get information about ceph cluster using ceph tools pod.
       2. Define node for downscaling
       3. Remove node from ceph cluster
       4. Create remove request with completeCleanUp: true key
       5. Wait request Completed
       6. Wait ceph health OK
       7. Delete machine
       8. Delete bmh
       9. Wait ceph health OK
    """

    cluster = kaas_manager.get_mgmt_cluster()
    managed_ns = kaas_manager.get_namespace(settings.TARGET_NAMESPACE)
    managed_cluster = managed_ns.get_cluster(settings.TARGET_CLUSTER)
    ceph_health_timeout = 2400
    # Check ceph health before proceed
    show_step(1)
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Ceph health is HEALTH_OK. Continue...")
    cmd_status = ['/bin/sh', '-c', 'ceph -s -f json']
    ceph_tools_pod = managed_cluster.get_ceph_tool_pod()

    show_step(2)
    machines_for_delete = [machine for machine in managed_cluster.get_machines()
                           if 'nodeforscale' in machine.name]
    machine = machines_for_delete[0]
    disk_count = 0
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Check MiraCeph Ready before patching")
        machine_name_in_spec = machine.get_k8s_node_name()
        managed_cluster.check.wait_miracephhealth_state(timeout=3600)
        show_step(3)
        LOG.info("Patching MiraCeph CRD")
        ceph_crd = managed_cluster.get_miracephcluster()
        new_crd = ceph_crd.read()
        for node in new_crd.spec['nodes']:
            if node['name'] == machine_name_in_spec:
                disk_count = len(node['devices'])
                break
    else:
        LOG.info("Check KaasCephCluster Ready before patching")
        machine_name_in_spec = machine.name
        managed_cluster.check.wait_kaascephcluster_state(timeout=3600)
        show_step(3)
        LOG.info("Patching miraceph cephcluster CRD")
        ceph_crd = managed_cluster.get_cephcluster()
        new_crd = ceph_crd.read()
        disk_count = len(
            new_crd.spec['cephClusterSpec']['nodes'][machine_name_in_spec]['storageDevices'])
    LOG.info("Check if HEALTH_OK before patching CRD")
    # Get osds num before patch and calculate num after
    num_osds_before_patch = int(yaml.safe_load(
        ceph_tools_pod.exec(cmd_status)).get(
        'osdmap').get('num_osds'))
    LOG.info(f"OSDs before patch: {num_osds_before_patch}")
    num_osds_after_patch = num_osds_before_patch - disk_count
    LOG.info(f"OSDs after patch: {num_osds_after_patch}")
    LOG.info("Machine for delete: {}".format(machine_name_in_spec))
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        LOG.info("Patching MiraCeph CRD")
        nodes = ceph_crd.read().spec['nodes']
        new_nodes = []
        for node in nodes:
            if node['name'] != machine_name_in_spec:
                new_nodes.append(node)
        ceph_crd.patch({'spec': {'nodes': new_nodes}})
        show_step(4)
        ceph_operation_yaml_path = settings.CEPH_OSDREMOVEREQUEST_YAML_PATH
        template = templates.render_template(ceph_operation_yaml_path)
        ceph_operation_conf = yaml.load(template, Loader=yaml.SafeLoader)
        node_remove_map = {'approve': True,
                           'nodes': {machine_name_in_spec: {'completeCleanUp': True}}}
        ceph_operation_conf['spec'] = node_remove_map
        ceph_request = managed_cluster.k8sclient.cephosdremoverequests.create(namespace='ceph-lcm-mirantis',
                                                                              body=ceph_operation_conf)
        ceph_request_name = ceph_request.data['metadata']['name']

        show_step(5)
        LOG.info("Wait for CephOsdRemoveRequest complete actions")
        managed_cluster.check.wait_cephosdremoverequest_phase(name=ceph_request_name, namespace='ceph-lcm-mirantis',
                                                              expected_request_phase='Completed')
        LOG.info("CephOsdRemoveRequest has completed actions")
    else:
        LOG.info("Patching KaasCephCluster CRD")
        ceph_crd.patch({'spec': {'cephClusterSpec': {'nodes': {machine_name_in_spec: None}}}})
        LOG.info("Waiting for miraceph spec nodes updated after kaascepcluster patch")
        managed_cluster.check.wait_miraceph_nodes_updated()
        show_step(4)
        ceph_operation_yaml_path = settings.CEPH_OPERATIONREQUEST_YAML_PATH
        template = templates.render_template(ceph_operation_yaml_path)
        ceph_operation_conf = yaml.load(template, Loader=yaml.SafeLoader)
        node_remove_map = {'approve': True,
                           'nodes': {machine_name_in_spec: {'completeCleanUp': True}}}
        ceph_operation_conf['spec']['osdRemove'] = node_remove_map
        ceph_request = cluster.k8sclient.kaas_cephoperationrequests.create(namespace=settings.TARGET_NAMESPACE,
                                                                           body=ceph_operation_conf)
        ceph_request_name = ceph_request.data['metadata']['name']

        show_step(5)
        LOG.info("Wait for ceph operation request complete actions")
        cluster.check.wait_cephoperationrequest_state(name=ceph_request_name, namespace=settings.TARGET_NAMESPACE,
                                                      expected_request_phase='Completed',
                                                      expected_operation_phase='Completed',
                                                      operation_key='osdRemoveStatus')
        LOG.info("KaaSCephOperationRequest has completed actions")
    LOG.info("Waiting for OSDs number is decreased in ceph cluster")
    ceph_tools_pod = managed_cluster.get_ceph_tool_pod()
    waiters.wait(
        lambda: yaml.safe_load(ceph_tools_pod.exec(cmd_status)).get(
            'osdmap').get(
            'num_osds') == num_osds_after_patch,
        timeout=ceph_health_timeout, interval=30,
        timeout_msg="OSDs number doesn't match expected number after {} sec. "
                    "Current osd number is {}. But shoud be {}".format(
            ceph_health_timeout,
            yaml.safe_load(ceph_tools_pod.exec(cmd_status)).get(
                'osdmap').get(
                'num_osds'), num_osds_after_patch))

    # Wait for HEALTH_OK after osds were deleted
    show_step(6)
    LOG.info("Check ceph cluster status")
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Ceph cluster is healthy. Congratulations!")
    LOG.info("Waiting for osd pods number is decreased "
             "according to disks count")
    waiters.wait(lambda: len(
        [pod for pod in managed_cluster.k8sclient.pods.list(
            namespace='rook-ceph') if 'rook-ceph-osd-' in pod.name
         and 'osd-prepare' not in pod.name]) == num_osds_after_patch,
        timeout=ceph_health_timeout, interval=30,
        timeout_msg="osd pods number is not equal to osd disks count "
                    "in ceph cluster. Current osd pods number is {}. "
                    "But shoud be {}".format(
            len([pod for pod in managed_cluster.k8sclient.pods.list(
                namespace='rook-ceph') if 'rook-ceph-osd-' in pod.name]),
            num_osds_after_patch))

    show_step(7)
    LOG.info("Delete machine")
    m_for_delete = managed_cluster.get_machine(name=machine.name)
    machine_deletion_policy_enabled = cluster.machine_deletion_policy_enabled()
    if machine_deletion_policy_enabled:
        check_machine_unsafe_delete(cluster, m_for_delete, wait_deletion_timeout=3600)
    else:
        m_for_delete.delete()
        cluster.check.check_deleted_node(m_for_delete.name)

    bmh_for_delete = [bmh for bmh in managed_ns.get_baremetalhosts()
                      if 'nodeforscale' in bmh.name]
    bmh = bmh_for_delete[0]
    managed_cluster.wait_machine_deletion(machine.name, interval=60)
    managed_ns.wait_baremetalhosts_statuses(
        nodes=bmh.name,
        wait_status='ready',
        retries=30,
        interval=60)
    show_step(8)
    managed_ns.delete_baremetalhost(name=bmh.name)
    managed_ns.wait_baremetalhost_deletion(bmh.name, wait_bmh_cred=True)
    LOG.info("Next BMHs were deleted succesfully: {}".format(
        bmh_for_delete))

    time.sleep(5)

    show_step(9)
    try:
        health_info = managed_cluster.check.get_ceph_health_detail()
        assert health_info['status'] == "HEALTH_OK", f'Health is not OK. Will not proceed. ' \
                                                     f'Current ceph health status: {health_info}'
    except AssertionError:
        managed_cluster.check.wait_ceph_health_status(
            timeout=ceph_health_timeout)
    LOG.info("Check ceph cluster status")
    if managed_cluster.workaround.skip_kaascephcluster_usage():
        managed_cluster.check.wait_miraceph_phase()
        managed_cluster.check.wait_miracephhealth_state(timeout=2700)
    else:
        managed_cluster.check.wait_kaascephcluster_state(timeout=2700)
    managed_cluster.check.check_ceph_pvc()
    managed_cluster.check.check_cluster_readiness()
    LOG.info("Ceph cluster is healthy. Congratulations!")

    # regarding https://mirantis.jira.com/browse/PRODX-31807
    LOG.info("Sleep 6 minutes for SL alert stabilization after node deletion")
    time.sleep(6 * 60)
