#    Copyright 2025 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import cachetools.func as cachetools_func
import copy
from deepdiff import DeepDiff

import pytest
import re
import time
import yaml

from kubernetes.client.exceptions import ApiException as k8sApiException
from si_tests import logger
from si_tests import settings
from si_tests.utils import templates
from si_tests.utils import exceptions as timeout_exceptions
from si_tests.managers.netchecker_manager import NetcheckerManager
from si_tests.utils import utils, waiters

LOG = logger.logger


class NetcheckerNegativeBase:
    """Base class for NetChecker negative tests."""

    @pytest.fixture(autouse=True)
    def setup(self, kaas_manager):
        self.cluster_ns_name = settings.TARGET_NAMESPACE
        self.cluster_name = settings.TARGET_CLUSTER
        self.ns = kaas_manager.get_namespace(self.cluster_ns_name)
        self.cluster = self.ns.get_cluster(self.cluster_name)
        self.netchecker = NetcheckerManager(self.cluster)
        self.inventory_config_name = settings.NETCHECKER_INVENTORY_CONFIG_NAME
        self.targets_config_name = settings.NETCHECKER_TARGETS_CONFIG_NAME
        self.netchecker_namespace = settings.NETCHECKER_NAMESPACE
        self.netchecker_config_path = settings.NETCHECKER_FILE_PATH
        self.netchecker_obj_name = settings.NETCHECKER_OBJECT_NAME
        self.fake_l2template_name = 'si-test-fake-l2template'
        self.fake_subnet_name = 'si-test-fake-subnet'
        self._netchecker_object = None

    @property
    @cachetools_func.ttl_cache(ttl=300)
    def k8sclient(self):
        return self.cluster.k8sclient

    @property
    def prometheusclient(self):
        return self.cluster.prometheusclient

    @property
    def infraconnectivitymonitors(self):
        return self.netchecker.infraconnectivitymonitors

    @property
    def ipam_subnets(self):
        return self.cluster._manager.api.ipam_subnets

    @property
    def l2templates(self):
        return self.cluster._manager.api.l2templates

    @staticmethod
    def perform_cluster_health_checks(cluster,
                                      check_cluster_nodes_timeout=1800,
                                      check_k8s_nodes_timeout=360,
                                      check_cluster_readiness_timeout=settings.CHECK_CLUSTER_READINESS_TIMEOUT):
        """Perform all cluster health checks"""
        cluster.check.check_cluster_nodes(timeout=check_cluster_nodes_timeout)
        cluster.check.check_k8s_nodes(timeout=check_k8s_nodes_timeout)
        cluster.check.check_cluster_readiness(timeout=check_cluster_readiness_timeout)

    @pytest.fixture
    def l2t(self):
        machine = self.cluster.get_machines()[0]
        return self.cluster._manager.get_l2template(machine.l2template_name, namespace=self.cluster_ns_name)

    @pytest.fixture
    def prepare_fake_objects(self, l2t):
        # Subnet is required to correctly modify l2template to use this subnet and check netchecker changed
        fake_subnet_name = self.fake_subnet_name
        fake_l2t_name = self.fake_l2template_name
        is_subnet_exists = self.ipam_subnets.present(name=fake_subnet_name, namespace=self.cluster.namespace)
        is_l2t_exists = self.l2templates.present(name=self.fake_l2template_name, namespace=self.cluster.namespace)
        if not is_subnet_exists:
            fake_cidr = '10.0.0.240/28'
            subnet_labels = {
                'cluster.sigs.k8s.io/cluster-name': f'{self.cluster.name}',
                'kaas.mirantis.com/provider': 'baremetal',
                'fake-subnet-label': 'present'
            }
            test_subnet = self.ns.create_ipam_subnet(name=fake_subnet_name, cidr=fake_cidr, labels=subnet_labels)
        else:
            LOG.info(f"Subnet with name {fake_subnet_name} already exists")
            test_subnet = self.ipam_subnets.get(name=fake_subnet_name, namespace=self.cluster.namespace)
        if not is_l2t_exists:

            fake_l2t_labels = {'kaas.mirantis.com/provider': 'baremetal',
                               'si-test-fake-label': 'present',
                               'cluster.sigs.k8s.io/cluster-name': f'{self.cluster.name}'}
            fake_l2t_spec = l2t.data.get('spec', {})
            l3layout = [{'labelSelector': {'kaas.mirantis.com/provider': 'baremetal', 'fake-subnet-label': 'present'},
                         'scope': 'namespace',
                         'subnetName': 'si-test-fake-subnet'}]

            fake_l2t_spec['l3Layout'] = l3layout

            l2t_obj = self.ns.create_l2template(name=fake_l2t_name,
                                                np_template={},
                                                labels=fake_l2t_labels,
                                                spec=fake_l2t_spec)
        else:
            LOG.info(f"L2 template with name {fake_l2t_name} already exists")
            l2t_obj = self.l2templates.get(name=self.fake_l2template_name, namespace=self.cluster.namespace)
        yield
        LOG.info(f"Removing {fake_l2t_name} l2template")
        l2t_obj.delete()
        addrs = self.ns.get_ipam_ipaddrs()
        addr_to_remove = [addr for addr in addrs if fake_subnet_name in addr.data.get(
            'spec', {}).get('subnetRef', '')]
        for addr in addr_to_remove:
            ip = addr.data.get('metadata', {}).get('labels', {}).get('ipam/IP', '')
            addr.delete()
            LOG.info(f"IPaddress {ip} removed ")
        LOG.info(f"Removing subnet {fake_subnet_name}")
        test_subnet.delete()

    def get_or_create_netchecker_object(self):
        if not self._netchecker_object:
            existed = self.infraconnectivitymonitors.list(namespace=self.cluster.namespace)
            if existed:
                assert len(existed) == 1, (f"Should be denied to have several netcheckers for one cluster. "
                                           f"Current netcheckers:\n{existed}")
                netchecker_object = existed[0]
                LOG.info(f"Object exists with name: {netchecker_object.name}")
                self._netchecker_object = netchecker_object
            else:
                # Prepare netchecker data
                render_options = {
                    "TARGET_CLUSTER": self.cluster.name,
                    "TARGET_NAMESPACE": self.cluster.namespace,
                    "NETCHECKER_OBJECT_NAME": self.netchecker_obj_name
                }
                netchecker_data = yaml.safe_load(
                    templates.render_template(self.netchecker_config_path, options=render_options))
                LOG.info("Creating netchecker object")
                self._netchecker_object = self.netchecker.create_infraconnectivitymonitor(data=netchecker_data)
        return self._netchecker_object

    def assert_creation_denied(self, data, expected_substrings, context=""):
        try:
            self.infraconnectivitymonitors.create(namespace=self.cluster.namespace, body=data)
            all_netcheckers = self.infraconnectivitymonitors.list_all()
            pytest.fail(f"{context}: Netchecker should not have been created.\nAll netcheckers:\n{all_netcheckers}")
        except k8sApiException as e:
            for substr in expected_substrings:
                if substr not in e.body:
                    raise AssertionError(f"{context}: Expected error message containing '{substr}' not "
                                         f"found.\nActual: {e.body}")
            LOG.info(f"{context}: Creation denied as expected with error: {e.body}")

    def _check_expected_nodes_monitored(self, expected_nodes, netchecker_object):
        inventory_nodes_status = netchecker_object.data.get(
            'status', {}).get('inventoryConfigStatus', {}).get('nodes', [])
        targets_nodes_status = netchecker_object.data.get(
            'status', {}).get('targetsConfigStatus', {}).get('nodes', [])
        inventory_nodes_names = {el.get('nodeName', '') for el in inventory_nodes_status if el.get('nodeName', '')}
        targets_nodes_names = {el.get('nodeName', '') for el in targets_nodes_status if el.get('nodeName', '')}
        overall_nodes_msg = (f"{yaml.dump(list(expected_nodes))}\nNodes in inventory status:\n"
                             f"{yaml.dump(list(inventory_nodes_names))}\nNodes in targets status:\n"
                             f"{yaml.dump(list(targets_nodes_names))}")
        err_msg_mon = "Nodes in netchecker object are not as expected.\nExpected nodes:\n" + f"{overall_nodes_msg}"

        if expected_nodes == inventory_nodes_names == targets_nodes_names:
            return True
        LOG.warning(err_msg_mon)
        return False


@pytest.mark.usefixtures('netchecker_cleanup_actions')
class TestNetcheckerNegativeAdmission(NetcheckerNegativeBase):
    """Class for NetChecker negative tests related to admission rules"""

    def test_create_second_netchecker(self, show_step):
        """Create second netchecker for same cluster.

        Scenario:
            1. Check cluster readiness
            2. Create first netchecker
            3. Create second netchecker with different name for same cluster
        """

        show_step(1)
        self.perform_cluster_health_checks(cluster=self.cluster)
        show_step(2)
        first_netchecker = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(first_netchecker)
        first_netchecker_name = first_netchecker.name
        second_netchecker_name = first_netchecker_name + '-' + utils.gen_random_string(3)
        second_netchecker_data = {
            'apiVersion': 'kaas.mirantis.com/v1alpha1',
            'kind': 'InfraConnectivityMonitor',
            'metadata': {
                'name': second_netchecker_name,
                'namespace': self.cluster.namespace
            },
            'spec': {
                'targetCluster': self.cluster.name
            }
        }
        show_step(3)
        self.assert_creation_denied(data=second_netchecker_data, expected_substrings=["admission webhook",
                                                                                      "denied the request",
                                                                                      "already exist"],
                                    context="Multiple netcheckers creation")

    def test_create_netchecker_for_wrong_cluster(self, show_step):
        """Create second netchecker for wrong cluster.

        Scenario:
            1. Check cluster readiness
            2. Create netchecker with wrong cluster name
        """

        show_step(1)
        self.perform_cluster_health_checks(cluster=self.cluster)
        test_cluster_name = self.cluster.name + '-' + utils.gen_random_string(3)

        netchecker_data = yaml.safe_load(
            templates.render_template(self.netchecker_config_path))
        netchecker_data['spec']['targetCluster'] = test_cluster_name

        show_step(2)
        self.assert_creation_denied(data=netchecker_data, expected_substrings=["admission webhook",
                                                                               "denied the request",
                                                                               "unable to get cluster",
                                                                               "not found"],
                                    context="Wrong cluster name")

    def test_unsupported_specs(self, show_step):
        """Create netchecker with unsupported specs.

        Scenario:
            1. Check cluster readiness
            2. Create netchecker with wrong spec
        """

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        netchecker_data = yaml.safe_load(
            templates.render_template(self.netchecker_config_path))
        current_specs = netchecker_data.get('spec', {})
        current_specs['unsupportedSpec'] = 'testvalue'
        show_step(2)
        netchecker = self.infraconnectivitymonitors.create(body=netchecker_data, namespace=self.cluster.namespace)
        netchecker_name = netchecker.name
        self.netchecker.wait_infraconnectivitymonitor_status(netchecker)
        netchecker = self.infraconnectivitymonitors.get(name=netchecker_name, namespace=self.cluster.namespace)
        assert not netchecker.data.get('unsupportedSpec', ''), (f"Unsupported spec should not exist in netchecker "
                                                                f"object. Current spec:\n"
                                                                f"{yaml.dump(netchecker.data.get('spec'))}")


@pytest.mark.usefixtures('netchecker_cleanup_actions')
class TestNetcheckerNegativeTemplates(NetcheckerNegativeBase):
    """Class for NetChecker negative tests related to l2 templates"""

    @pytest.fixture(scope='function')
    def restore_template(self, l2t):
        l2t_data = l2t.data
        initial_specs = l2t_data.get('spec', {})
        initial_state = l2t.data.get('status', {}).get('state', '')
        yield
        LOG.info("Restore l2template and wait status OK")
        l2t.patch({'spec': initial_specs})
        # It is better to wait at least one minute after patch to be sure ipamhosts are reconsiled
        time.sleep(60)
        waiters.wait(lambda: l2t.data.get('status', {}).get('state', '') == initial_state)
        LOG.info(f"l2 template {l2t.name} restored to initial state")
        all_ipam_hosts = self.ns.get_ipam_hosts()
        l2t_uid = l2t_data.get('metadata', {}).get('uid', '')
        ipam_hosts_with_given_l2t = [host for host in all_ipam_hosts if l2t_uid in
                                     host.data.get('status', {}).get('l2TemplateRef', '')]
        hosts_to_apply_config = []
        for host in ipam_hosts_with_given_l2t:
            try:
                # IpamHost reconcile may take up to 1 min. Lets wait 90 sec to be sure
                waiters.wait(lambda: host.data.get('status', {}).get('messages', []), timeout=90, interval=15)
                messages = host.data.get('status', {}).get('messages', [])
                LOG.info(f"Messages from ipamhost:\n{yaml.dump(messages)}")
                for message in messages:
                    if "NetconfigCandidate are different" in message or "NetconfigFiles is older" in message:
                        hosts_to_apply_config.append(host.name)
                        LOG.info(f"Changes require applying on host {host.name}. Applying changes")
                        host.patch({'spec': {'netconfigUpdateAllow': True}})
                        break
            except timeout_exceptions.TimeoutError:
                LOG.info(f"No messages in ipamhost {host.name} about netconfig. No applying required.")
        if hosts_to_apply_config:
            LOG.info(f"Config was changed on next machines:\n{yaml.dump(hosts_to_apply_config)}\nWaiting for hosts "
                     f"started to apply config")
            self.cluster.check.check_any_machine_exp_status(expected_status='Prepare')
        # It can take about 15 min for every node to be redeployed. Use timeout depending on hosts number
        check_cluster_nodes_timeout = 15 * 60 * len(ipam_hosts_with_given_l2t)
        self.perform_cluster_health_checks(self.cluster, check_cluster_nodes_timeout=check_cluster_nodes_timeout)

    @pytest.mark.usefixtures('prepare_fake_objects')
    def test_l2template_error(self, l2t, restore_template, show_step):
        """Validate netchecker object when l2 template has errors.

        Scenario:
            1. Check cluster readiness
            2. Modify l2 template to have error in status
            3. Create netchecker
            4. Check that nodes that are using this template with error are excluded from monitoring
            5. Check for correct message about l2t error in netchecker object
        """

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        all_cluster_machines = self.cluster.get_machines()
        all_machines_names = {m.name for m in all_cluster_machines}
        l2t_name = l2t.name
        machines_names_with_given_l2t = {m.name for m in all_cluster_machines if m.l2template_name == l2t_name}
        expected_inventoryconfig_status = 'ok'
        expected_targetsconfig_status = 'ok'
        if all_machines_names == machines_names_with_given_l2t:
            LOG.info(f"All of machines are affected by given l2 template {l2t_name}. In this case we don't expect "
                     f"netchecker status will be ok.")
            expected_inventoryconfig_status = 'Inventory config has 0 nodes'
            expected_targetsconfig_status = 'Targets config has 0 nodes'

        show_step(2)
        l2t.patch({'spec': {'npTemplate': 'empty'}})
        waiters.wait(lambda: l2t.data.get('status', {}).get('state', '') == 'ERR')
        show_step(3)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(
            n_obj,
            expected_inventoryconfig_status=expected_inventoryconfig_status,
            expected_targetsconfig_status=expected_targetsconfig_status,
            overall_status_only=True)
        k8s_nodes_with_given_l2t = {m.get_k8s_node_name() for m in all_cluster_machines if
                                    m.l2template_name == l2t_name}

        all_k8s_nodes_names = {m.get_k8s_node_name() for m in all_cluster_machines}
        expected__monitoring_nodes = all_k8s_nodes_names - k8s_nodes_with_given_l2t
        netchecker_data = n_obj.data

        show_step(4)
        waiters.wait(lambda: self._check_expected_nodes_monitored(expected_nodes=expected__monitoring_nodes,
                                                                  netchecker_object=n_obj),
                     timeout=300, interval=30)
        inventory_status_nodes = netchecker_data.get('status', {}).get('inventoryConfigStatus', {}).get('nodes', [])
        show_step(5)
        machine_message_map = {}
        for machine in machines_names_with_given_l2t:
            machine_message = [data.get('status', '') for data in inventory_status_nodes if
                               data.get('machineName', '') == machine]
            machine_message = machine_message[0] if machine_message else ""
            machine_message_map[machine] = machine_message
        err_msg = (f"Some machines don't contain message about l2 template error.\n"
                   f"All machines messages:\n{yaml.dump(machine_message_map)}")
        LOG.info(f"Messages for machines:\n{yaml.dump(machine_message_map)}")
        assert all(f"{l2t_name} has errors" in msg for msg in machine_message_map.values()), err_msg

    def test_fix_l2template(self, l2t, restore_template, show_step):
        """Validate netchecker object after fixing l2 template with error.

        Scenario:
            1. Check cluster readiness
            2. Modify l2 template to have error in status
            3. Create netchecker
            4. Restore l2 template to have no errors
            5. Check that all nodes are monitored by netchecker after fixing l2 template
            6. Check cluster readiness after test
        """

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        init_spec = l2t.data.get('spec', {})
        all_machines = self.cluster.get_machines()
        all_machines_names = {m.name for m in all_machines}
        l2t_name = l2t.name
        machines_names_with_given_l2t = {m.name for m in all_machines if m.l2template_name == l2t_name}
        expected_inventoryconfig_status = 'ok'
        expected_targetsconfig_status = 'ok'
        if all_machines_names == machines_names_with_given_l2t:
            LOG.info(f"All of machines are affected by given l2 template \'{l2t_name}\'. In this case we don't expect "
                     f"netchecker status will be ok.")
            expected_inventoryconfig_status = 'Inventory config has 0 nodes'
            expected_targetsconfig_status = 'Targets config has 0 nodes'
        all_k8s_nodes_names = {m.get_k8s_node_name() for m in all_machines}
        show_step(2)
        l2t.patch({'spec': {'npTemplate': 'empty'}})
        waiters.wait(lambda: l2t.data.get('status', {}).get('state', '') == 'ERR')
        show_step(3)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(
            n_obj,
            expected_inventoryconfig_status=expected_inventoryconfig_status,
            expected_targetsconfig_status=expected_targetsconfig_status,
            overall_status_only=True)
        LOG.info(f"Fix l2 template {l2t_name}")
        l2t.patch({'spec': init_spec})
        waiters.wait(lambda: l2t.data.get('status', {}).get('state', '') == 'OK')
        self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        show_step(5)

        LOG.info("Waiting for all machines are monitored by netchecker after fixing l2 template")
        waiters.wait(lambda: self._check_expected_nodes_monitored(expected_nodes=all_k8s_nodes_names,
                                                                  netchecker_object=n_obj),
                     timeout=600, interval=20)
        show_step(6)
        self.perform_cluster_health_checks(self.cluster)

    @pytest.mark.usefixtures('prepare_fake_objects')
    def test_modify_l2template(self, l2t, restore_template, show_step):
        """Validate netchecker object after fixing l2 template with error.

        Scenario:
            1. Check cluster readiness
            2. Create netchecker
            3. Add new data (subnet, ipaddr) to used l2 template
            4. Apply l2 template changes on target nodes
            5. Check that new data also handled by netchecker
            6. Check cluster readiness after test
        """

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        show_step(2)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        # Modify template to use our fake subnet in l3layout and check netchecker
        l2t_l3layout = l2t.data.get('spec', {}).get('l3Layout', [])
        netplan = l2t.data.get('spec', {}).get('npTemplate', '')
        show_step(3)
        l2t_l3layout.extend([
            {
                'labelSelector': {
                    'kaas.mirantis.com/provider': 'baremetal',
                    'fake-subnet-label': 'present'
                },
                'scope': 'namespace',
                'subnetName': f'{self.fake_subnet_name}'
            }
        ])
        vlan_to_add = """
si-fake-vlan:
  id: 999
  link: bond0
  addresses:
    - {{ ip "si-fake-vlan:si-test-fake-subnet" }}
"""
        eth_to_add = """
si-fake-eth:
  dhcp4: false
  addresses:
    - {{ ip "si-fake-eth:si-test-fake-subnet" }}
  set-name: si-fake-eth
"""
        if 'vlans' in netplan:
            result = utils.add_section_to_str_np_template(netplan, net_block=vlan_to_add, net_type="vlans")
        else:
            result = utils.add_section_to_str_np_template(netplan, net_block=eth_to_add, net_type="ethernets")
        l2t.patch({'spec': {'l3Layout': l2t_l3layout, 'npTemplate': result}})
        waiters.wait(lambda: l2t.data.get('status', {}).get('state', '') == 'OK')
        all_ipam_hosts = self.ns.get_ipam_hosts()
        l2t_uid = l2t.data.get('metadata', {}).get('uid', '')
        ipam_hosts_with_given_l2t = [host for host in all_ipam_hosts if l2t_uid in
                                     host.data.get('status', {}).get('l2TemplateRef', '')]
        show_step(4)
        LOG.info("Apply new netplan on nodes")
        for host in ipam_hosts_with_given_l2t:
            # IpamHost reconcile may take up to 1 min. Let's wait 2 min to be sure
            waiters.wait(lambda: host.data.get('status', {}).get('messages', []), timeout=120, interval=15)
            host.patch({'spec': {'netconfigUpdateAllow': True}})
        self.cluster.check.check_any_machine_exp_status(expected_status='Prepare')
        LOG.info("Waiting for new l2template is applied on machines")
        # It can take about 15 min for every node to be redeployed. Use timeout depending on hosts number
        check_cluster_nodes_timeout = 15 * 60 * len(ipam_hosts_with_given_l2t)
        self.perform_cluster_health_checks(self.cluster, check_cluster_nodes_timeout=check_cluster_nodes_timeout)
        inventory_conf = self.netchecker.get_inventory_config(name=self.netchecker.inventory_config_name)
        subnet_name = subnet_tag = f"{self.cluster.namespace}/{self.fake_subnet_name}"
        subnets_data = inventory_conf.data.get('spec', {}).get('subnetsConfig', [])
        err_msg = (f"Added new subnet {self.fake_subnet_name} to l2template {l2t.name} is not monitored by netchecker "
                   f"{n_obj.name} and not added into checker inventory config. Checker inventory data:\n"
                   f"{yaml.dump(inventory_conf.data.get('spec', {}))}")
        show_step(5)
        assert [subnet for subnet in subnets_data if subnet.get('name', '') == subnet_name and
                subnet.get('tag', '') == subnet_tag], err_msg
        show_step(6)
        self.perform_cluster_health_checks(self.cluster)

    @pytest.mark.usefixtures('prepare_fake_objects')
    def test_l2template_not_used(self, show_step):
        """Check that only used l2 templates are handled by netchecker

        Scenario:
            1. Fake objects are prepared by fixture 'prepare_fake_objects'
            2. Check cluster readiness
            3. Create netchecker
            4. Check that no information about fake objects is existed in netchecker inventory
        """

        show_step(2)
        self.perform_cluster_health_checks(self.cluster)
        fake_l2tname = self.fake_l2template_name
        fake_subnet_name = self.fake_subnet_name
        cluster_ns = self.cluster.namespace
        inventory_config_name = self.inventory_config_name
        show_step(3)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        show_step(4)
        waiters.wait(lambda: self.netchecker.get_inventory_config(name=inventory_config_name))
        inventory_conf = self.netchecker.get_inventory_config(name=inventory_config_name)
        subnet_name = subnet_tag = f"{cluster_ns}/{fake_subnet_name}"
        subnets_data = inventory_conf.data.get('spec', {}).get('subnetsConfig', [])
        err_msg = (f"Subnet {fake_subnet_name} should not be monitored by netchecker "
                   f"{n_obj.name} because l2template {fake_l2tname} is not used. Checker inventory data:\n"
                   f"{yaml.dump(inventory_conf.data.get('spec', {}))}")

        assert not [subnet for subnet in subnets_data if subnet.get('name', '') == subnet_name and
                    subnet.get('tag', '') == subnet_tag], err_msg


@pytest.mark.usefixtures('netchecker_cleanup_actions')
class TestNetcheckerNegativeConfigs(NetcheckerNegativeBase):
    """Class for NetChecker negative tests related to inventory and targets configs"""

    def test_modify_inventory_config(self, show_step):
        """Check that manual changes in checkerinventory config are overwritten by controller

        Scenario:
            1. Check cluster readiness
            2. Create netchecker
            3. Modify checkerinventoryconfig directly
            4. Make sure data is restored by controller
            5. Check cluster readiness after test
        """

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        inventory_cfg_name = self.netchecker.inventory_config_name
        fake_node_config = {'expectedSubnetTags': ['default/fake-subnet-name'],
                            'nodeName': 'fake-node-name'}
        fake_subnet_config = {'name': 'default/fake-subnet-name',
                              'ranges': ['10.0.0.200-10.0.0.250'],
                              'tag': 'default/fake-subnet-name'}
        show_step(2)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        waiters.wait(lambda: self.netchecker.get_inventory_config(name=inventory_cfg_name))
        inventory_config = self.netchecker.get_inventory_config(name=inventory_cfg_name)
        inventory_config_spec = inventory_config.data.get('spec', {})
        show_step(3)
        initial_specs = copy.deepcopy(inventory_config_spec)
        node_config = inventory_config_spec.get('nodesConfig', [])
        subnets_config = inventory_config_spec.get('subnetsConfig', [])
        node_config.append(fake_node_config)
        subnets_config.append(fake_subnet_config)
        LOG.info("Add fake data into inventoryconfig")
        inventory_config.patch({'spec': inventory_config_spec})
        waiters.wait(lambda: inventory_config.data.get('status', {}).get('overallStatus', '') == 'ok')
        LOG.info("Check data is removed by controller")

        def check_specs_are_restored(expected_specs):
            config = self.netchecker.get_inventory_config(name=inventory_cfg_name)
            specs = config.data.get('spec', {})
            diff = DeepDiff(specs, expected_specs, ignore_order=True)
            if diff:
                LOG.warning(f"Found difference between actual and expected specs:\n{diff}")
                return False
            return True

        msg = (
            f"Manual inventory config changes should be overwritten by controller, but wasn't. Expected specs:\n"
            f"{yaml.dump(initial_specs)}\nCurrent specs:\n"
            f"{yaml.dump(self.netchecker.get_inventory_config(name=inventory_cfg_name).data.get('spec', {}))}"
        )
        show_step(4)
        waiters.wait(lambda: check_specs_are_restored(expected_specs=initial_specs), timeout_msg=msg, timeout=300)
        show_step(5)
        self.perform_cluster_health_checks(self.cluster)

    def test_modify_targets_config(self, show_step):
        """Check that manual changes in netcheckertargets config are overwritten by controller

        Scenario:
            1. Check cluster readiness
            2. Create netchecker
            3. Modify netcheckertargets config directly
            4. Make sure data is restored by controller
            5. Check cluster readiness after test
        """

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        targets_cfg_name = self.netchecker.targets_config_name
        show_step(2)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        waiters.wait(lambda: self.netchecker.get_targets_config(name=targets_cfg_name))
        show_step(3)
        targets_config = self.netchecker.get_targets_config(name=targets_cfg_name)
        targets_config_spec = targets_config.data.get('spec', {})
        initial_specs = copy.deepcopy(targets_config_spec)

        fake_targets_config = {
            'nodeName': 'si-test-fake-node',
            'targets': [{
                'nodeName': 'si-test-fake-target',
                'subnetTags': [f"{self.cluster.namespace}/'si-test-fake-subnet'"]
            }]
        }
        current_targets_config = targets_config_spec.get('nodesConfig', [])
        current_targets_config.append(fake_targets_config)
        LOG.info("Add fake data into targets config")
        targets_config.patch({'spec': targets_config_spec})
        waiters.wait(lambda: not targets_config.data.get('status', {}).get('error', ''))
        LOG.info("Check data is removed by controller")

        def check_specs_are_restored(expected_specs):
            config = self.netchecker.get_targets_config(name=targets_cfg_name)
            specs = config.data.get('spec', {})
            diff = DeepDiff(specs, expected_specs, ignore_order=True)
            if diff:
                LOG.warning(f"Found difference between actual and expected specs:\n{diff}")
                return False
            return True

        msg = (
            f"Manual targets config changes should be overwritten by controller, but wasn't. Expected specs:\n"
            f"{yaml.dump(initial_specs)}\nCurrent specs:\n"
            f"{yaml.dump(self.netchecker.get_targets_config(name=targets_cfg_name).data.get('spec', {}))}"
        )
        show_step(4)
        waiters.wait(lambda: check_specs_are_restored(expected_specs=initial_specs), timeout_msg=msg)
        show_step(5)
        self.perform_cluster_health_checks(self.cluster)

    def test_delete_inventory_config(self, show_step):
        """Check that after manual deletion of checkerinventory config it is restored by controller

        Scenario:
            1. Check cluster readiness
            2. Create netchecker
            3. Delete checkerinventory config directly
            4. Make sure config restored by controller
        """

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        inventory_cfg_name = self.netchecker.inventory_config_name
        show_step(2)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        waiters.wait(lambda: self.netchecker.get_inventory_config(name=inventory_cfg_name))
        inventory_config = self.netchecker.get_inventory_config(name=inventory_cfg_name)
        creation_timestamp = inventory_config.data.get('metadata', {}).get('creation_timestamp', '')
        show_step(3)
        LOG.info(f"Removing checkerinventory config {inventory_cfg_name}")
        # Wait at least a few sec to have different timestamp. Somtemes all actions are performed less than in 1 sec
        time.sleep(2)
        inventory_config.delete()
        LOG.info("Waiting new checkerinventory config is created by controller")

        def check_inventory_timestamps_differs(old_timestamp):
            _i_config = self.netchecker.checkerinventoryconfigs.list(namespace=self.netchecker_namespace)
            if _i_config:
                _i_config = _i_config[0]
                try:
                    timestamp = _i_config.data.get('metadata', {}).get('creation_timestamp', '')
                    return False if old_timestamp == timestamp else True
                except Exception as e:
                    LOG.warning(f"Failed to get object data:\n{e}")
                    return False
            return False

        err_msg = "Checkerinventory config was not recreated after deletion"
        show_step(4)
        waiters.wait(lambda: check_inventory_timestamps_differs(old_timestamp=creation_timestamp),
                     timeout_msg=err_msg, timeout=300, interval=30)

    def test_delete_targets_config(self, show_step):
        """Check that after manual deletion of netcheckertargets config it is restored by controller

        Scenario:
            1. Check cluster readiness
            2. Create netchecker
            3. Delete netcheckertargets config directly
            4. Make sure config restored by controller
        """

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        targets_cfg_name = self.netchecker.targets_config_name
        show_step(2)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        waiters.wait(lambda: self.netchecker.get_targets_config(name=targets_cfg_name))
        targets_config = self.netchecker.get_targets_config(name=targets_cfg_name)
        creation_timestamp = targets_config.data.get('metadata', {}).get('creation_timestamp', '')
        show_step(3)
        LOG.info(f"Removing netchecker targets config {targets_cfg_name}")
        # Wait at least a few sec to have different timestamp. Somtemes all actions are performed less than in 1 sec
        time.sleep(2)
        targets_config.delete()
        LOG.info("Waiting new netchecker targets config is created by controller")

        def check_targets_timestamps_differs(old_timestamp):
            _targets_config = self.netchecker.netcheckertargetsconfigs.list(namespace=self.netchecker_namespace)
            if _targets_config:
                _targets_config = _targets_config[0]
                try:
                    timestamp = _targets_config.data.get('metadata', {}).get('creation_timestamp', '')
                    return False if old_timestamp == timestamp else True
                except Exception as e:
                    LOG.warning(f"Failed to get object data:\n{e}")
            return False

        show_step(4)
        err_msg = "Netchecker targets config was not recreated after deletion"
        waiters.wait_pass(lambda: check_targets_timestamps_differs(old_timestamp=creation_timestamp),
                          timeout_msg=err_msg, timeout=300, interval=30)


@pytest.mark.usefixtures('netchecker_cleanup_actions')
class TestNetcheckerNetworkConfigs(NetcheckerNegativeBase):
    """Class for NetChecker negative tests related to correct metrics collection during network outage"""

    @pytest.fixture(scope='function')
    def test_machines(self):
        if self.cluster.is_management:
            machine = self.cluster.get_machines()[0]
            return [machine]
        else:
            worker_machine = self.cluster.get_machines(machine_type="worker")[0]
            control_machine = self.cluster.get_machines(machine_type="control")[0]
            return [worker_machine, control_machine]

    @pytest.fixture(scope='function')
    def restore_network_on_node(self, test_machines):

        def collect_states(machines):
            ifaces_data = {}
            for machine in machines:
                LOG.info(f"Collecting interfaces data for machine {machine.name}")
                addresses = machine.get_machine_ipaddresses_from_netplan()
                ifaces = addresses.keys()
                ifaces_for_grep = " ".join(ifaces)
                cmd = f'for i in {ifaces_for_grep}; do ip link | grep $i; done'
                iface_data = machine.run_cmd(cmd).stdout_str
                for line in iface_data.strip().splitlines():
                    state_match = re.search(r'state (\w+)', line)
                    if state_match:
                        iface_name = line.split(':')[1].split('@')[0].strip()
                        state = state_match.group(1).lower()
                        ifaces_data.setdefault(machine.name, []).append({'iface': iface_name, 'state': state})
            return ifaces_data

        LOG.info("Collecting interfaces data before test")
        ifaces_data_before_test = collect_states(test_machines)
        yield
        LOG.info("Collecting interfaces data after test")
        ifaces_data_after_test = collect_states(test_machines)
        if ifaces_data_before_test != ifaces_data_after_test:
            LOG.info("Restore ifaces states to initial")
            diff = DeepDiff(ifaces_data_before_test, ifaces_data_after_test, ignore_order=True)
            for path, change in diff.get('values_changed', {}).items():
                parts = path.split('[')
                machine_name = parts[1].strip("']")
                index = int(parts[2].strip("]"))
                iface_info = ifaces_data_after_test[machine_name][index]
                iface = iface_info['iface']
                LOG.info(f"Restoring interface \'{iface}\' on machine {machine_name}"
                         f"to state \'{change['old_value']}\'")
                self.cluster.get_machine(machine_name).run_cmd(f"sudo ip link set dev {iface} {change['old_value']}")
        else:
            LOG.info("Interfaces states were not changed during test. Nothing to restore")

    def select_target_iface_for_disabling(self, machine):
        target_l2t = machine.l2template_name
        l2t_obj = self.ns.get_l2template(target_l2t)
        subnets_layout = l2t_obj.data.get('spec', {}).get('l3Layout', [])
        all_subnets = self.ns.get_ipam_subnets()
        skip_msg = "No appropriate subnet for testing found. Skipping check"

        for subnet in all_subnets:
            labels = subnet.data.get('metadata', {}).get('labels', {})

            if (
                    'ipam/SVC-k8s-lcm' in labels or
                    'ipam/SVC-LBhost' in labels or
                    'fake-vsrx' in subnet.name
            ):
                LOG.info(f"Skipping subnet {subnet.name} because it may break cluster stability")
                continue

            for layout in subnets_layout:
                selector = layout.get('labelSelector', {})
                if not selector.items() <= labels.items():
                    continue

                LOG.info(f"Selecting subnet {subnet.name} for testing")
                allocated_ips = subnet.data.get('status', {}).get('allocatedIPs', [])
                allocated_ips = [ip.split(':')[0] for ip in allocated_ips] if allocated_ips else []

                if not allocated_ips:
                    LOG.warning(f"No IPs allocated from subnet {subnet.name}. Selecting another subnet")
                    continue

                addr_from_netplan = machine.get_machine_ipaddresses_from_netplan()
                for iface, addr in addr_from_netplan.items():
                    ip = addr.split('/')[0]
                    if ip in allocated_ips:
                        return iface, ip

            LOG.debug(f"All subnets: {all_subnets}")
            pytest.skip(skip_msg)

        pytest.skip(skip_msg)

    def get_metrics_for_target(self, target_node_name, target_ip, query):
        metrics = self.prometheusclient.get_query(query=query)
        return [m for m in metrics if target_node_name in m.get('metric', {}).get('dst', '') and
                m.get('metric', {}).get('target_ip_address', '') == target_ip]

    def check_expected_metrics_number(self, expected_metrics_number, target_node_name, target_ip, query):
        current_metrics_number = len(self.get_metrics_for_target(target_node_name=target_node_name,
                                                                 target_ip=target_ip, query=query))
        if not current_metrics_number == expected_metrics_number:
            LOG.warning(f"Current metrics number for target node {target_node_name} is not as expected. Should be "
                        f"{expected_metrics_number} but currently is {current_metrics_number}")
            return False
        LOG.info(f"Metrics number as expected for target node {target_node_name}: {current_metrics_number}")
        return True

    def test_disable_net_check_metrics(self, show_step):
        """Check that after network down we have failures in cnnc metrics
        NOTE: Do not disable SVC-k8s-lcm network for testing, because node becomes unreachable

        Scenario:
            1. Check cluster readiness
            2. Select target node(s)
            3. Create netchecker
            4. Select interface for disabling
            5. Collect metrics befor disabling interface
            6. Disable iface on target node
            7. Wait some time for counters are changed
            8. Enable interface
            9. Wait cluster ready after network outage
            10. Compare metrics
        """

        if self.cluster.is_management:
            pytest.skip("Mgmt cluster has not enough networks to be able to disable one")

        wait_time_for_counters_changed = 120
        show_step(1)
        self.perform_cluster_health_checks(self.cluster)

        machine = self.cluster.get_machines()[0]
        total_machine_number = len(self.cluster.get_machines())
        k8s_node_name = machine.get_k8s_node_name()
        show_step(2)
        LOG.info(f"Selected machine for testing: {machine.name}")
        # Create netchecker
        show_step(3)
        n_obj = self.get_or_create_netchecker_object()
        self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        # Collect failure data before disabling interface
        show_step(4)
        iface_to_disable, ip = self.select_target_iface_for_disabling(machine)
        show_step(5)
        # Check that we have correct number of metrics. If we have N number of nodes, then it must be N-1 metrics number
        # because metric must be for every node except target
        expected_metrics_number = total_machine_number - 1
        LOG.info("Waiting for correct metrics number")
        waiters.wait(lambda: self.check_expected_metrics_number(expected_metrics_number=expected_metrics_number,
                                                                target_node_name=k8s_node_name, target_ip=ip,
                                                                query='cnnc_failure'), timeout=300, interval=30)
        target_node_metrics = self.get_metrics_for_target(
            target_node_name=k8s_node_name, target_ip=ip, query='cnnc_failure')
        stats_before = {}
        stats_after = {}
        error_nodes = []
        LOG.info("Check expected data exists in metrics")
        for metric in target_node_metrics:
            source_node = metric.get('metric', {}).get('source_node', '')
            values = metric.get('value', [])
            if not values:
                LOG.error(f"Something wrong with stats for source node {source_node}")
                error_nodes.append(metric)
        assert not error_nodes, (f"Next metrics don't have any values to check. Should have timestamps and "
                                 f"counters\n{yaml.dump(error_nodes)}")
        LOG.info("Collect stats before disabling interface")
        for metric in target_node_metrics:
            source_node = metric.get('metric', {}).get('source_node', '')
            timestamp = metric.get('value', [])[0]
            err_count = int(metric.get('value', [])[1])
            stats_before[source_node] = {'timestamp': timestamp, 'errors_counter': err_count}
        LOG.info(f"Next data collected:\n{yaml.dump(stats_before)}")

        show_step(6)
        LOG.info(f"Disabling interface \'{iface_to_disable}\' with ip \'{ip}\' on node {k8s_node_name}")
        machine.run_cmd(f"sudo ip link set dev {iface_to_disable} down")
        LOG.info("Waiting few minutes for counters are changed")
        show_step(7)
        time.sleep(wait_time_for_counters_changed)
        show_step(8)
        LOG.info(f"Enabling interface {iface_to_disable}")
        machine.run_cmd(f"sudo ip link set dev {iface_to_disable} up")
        show_step(9)
        LOG.info("Wait for cluster restored after network outage")
        self.perform_cluster_health_checks(self.cluster)
        show_step(10)
        LOG.info("Collect data one more time after interface restored")
        target_node_metrics = self.get_metrics_for_target(
            target_node_name=k8s_node_name, target_ip=ip, query='cnnc_failure')
        for metric in target_node_metrics:
            source_node = metric.get('metric', {}).get('source_node', '')
            timestamp = metric.get('value', [])[0]
            err_count = int(metric.get('value', [])[1])
            stats_after[source_node] = {'timestamp': timestamp, 'errors_counter': err_count}
        LOG.info(f"Next data collected for current moment:\n{yaml.dump(stats_after)}")
        LOG.info("Compare results")
        not_processed = []
        for node in stats_before:
            first_count = stats_before.get(node, {}).get('errors_counter', 0)
            second_count = stats_after.get(node, {}).get('errors_counter', 0)
            if not second_count > first_count:
                not_processed.append({'node': node,
                                      'counter_before_test': first_count,
                                      'counter_after_test': second_count})
        assert not not_processed, f"Some counters are not changed\n{yaml.dump(not_processed)}"

    def test_disable_net_create_netchecker(self, show_step, test_machines, restore_network_on_node):
        """Check that after network down we can still create netchecker and it should be ok
        NOTE: Do not disable SVC-k8s-lcm network for testing, because node becomes unreachable

        Scenario:
            1. Check cluster readiness
            2. Select interface for disabling
            3. Disable interface
            4. Create netchecker and wait for status ok
            5. Enable interface
            6. Wait for cluster readiness
        """

        if self.cluster.is_management:
            pytest.skip("Mgmt cluster has not enough networks to be able to disable one")

        show_step(1)
        self.perform_cluster_health_checks(self.cluster)
        machines_ifaces_map = {}
        for machine in test_machines:
            LOG.banner(f"Machine {machine.name}")
            show_step(2)
            iface_to_disable, ip = self.select_target_iface_for_disabling(machine)
            machines_ifaces_map.setdefault(machine.name, {}).update({'iface': iface_to_disable})
            show_step(3)
            LOG.info(f"Disabling interface \'{iface_to_disable}\' with ip \'{ip}\' on node"
                     f"{machine.get_k8s_node_name()}")
            machine.run_cmd(f"sudo ip link set dev {iface_to_disable} down")
        show_step(4)
        n_obj = self.get_or_create_netchecker_object()
        try:
            self.netchecker.wait_infraconnectivitymonitor_status(n_obj)
        except timeout_exceptions.TimeoutError:
            netchecker_states = n_obj.data.get('status', {})
            err_msg = (f"Netchecker should be created succesfully, when some interfaces are down but wasn't. "
                       f"Current states:\n{yaml.dump(netchecker_states)}")
            raise AssertionError(err_msg)

        for machine in test_machines:
            iface_to_enable = machines_ifaces_map.get(machine.name, {}).get('iface', '')
            cmd = f"sudo ip link set dev {iface_to_enable} up"
            show_step(5)
            LOG.info(f"Enabling interface {iface_to_enable} on machine {machine.name}")
            machine.run_cmd(cmd)
        show_step(6)
        self.perform_cluster_health_checks(self.cluster)


@pytest.mark.skip(reason='Will be implemented in HA tests')
class TestNetcheckerAgentConditions(NetcheckerNegativeBase):

    def test_stop_cnnc_agent_create_netchecker(self): pass

    def test_create_netchecker_delete_cnnc_agent(self): pass

    def test_recreate_cnnc_agent_check_metrics(self): pass

    def test_stop_cnnc_inventory_agent_create_netchecker(self): pass

    def test_create_netchecker_delete_cnnc_inventory_agent(self): pass

    def test_recreate_cnnc_inventory_agent_check_invebtory(self): pass
