Merge "Optimize checking SSH connectivity PROD-37096"
diff --git a/tcp_tests/managers/envmanager_heat.py b/tcp_tests/managers/envmanager_heat.py
index 95c1cb2..d26d06d 100644
--- a/tcp_tests/managers/envmanager_heat.py
+++ b/tcp_tests/managers/envmanager_heat.py
@@ -15,6 +15,7 @@
 import os
 import netaddr
 import yaml
+import pytest
 
 from devops.helpers import helpers
 from devops.helpers.helpers import ssh_client
@@ -152,7 +153,9 @@
 
     @property
     def __nested_resources(self):
-        resources = []
+        if hasattr(pytest, 'resources'):
+            return pytest.resources
+        pytest.resources = list()
         stacks = [s for s in self.__stacks.list(show_nested=True)]
         current_stack_id = self._current_stack.id
         for stack in stacks:
@@ -161,11 +164,11 @@
                 # Add resources to list
                 LOG.info("Get resources from stack {0}"
                          .format(stack.stack_name))
-                resources.extend([
+                pytest.resources.extend([
                     res for res in self.__resources.list(stack.id)
                 ])
-        LOG.info("Found {0} resources".format(len(resources)))
-        return resources
+        LOG.info("Found {0} resources".format(len(pytest.resources)))
+        return pytest.resources
 
     def _get_resources_by_type(self, resource_type):
         res = []
@@ -447,10 +450,8 @@
                     "Waiting for finish the bootstrap process on the nodes "
                     "with accessible SSH")
 
-        check_cloudinit_started = '[ -f /is_cloud_init_started ]'
         check_cloudinit_finished = ('[ -f /is_cloud_init_finished ] || '
                                     '[ -f /var/log/mcp/.bootstrap_done ]')
-        check_cloudinit_failed = 'cat /is_cloud_init_failed'
         passed = {}
         nodes_by_roles = self._get_nodes_by_roles(roles=underlay_node_roles)
         for node in nodes_by_roles:
@@ -464,11 +465,12 @@
             LOG.info("Waiting for SSH on node '{0}' / {1} ...".format(
                 node['name'], node_ip))
 
-            def _ssh_check(host,
-                           port,
-                           username=settings.SSH_NODE_CREDENTIALS['login'],
-                           password=settings.SSH_NODE_CREDENTIALS['password'],
-                           timeout=0):
+            def _readiness_check(
+                    host,
+                    port,
+                    username=settings.SSH_NODE_CREDENTIALS['login'],
+                    password=settings.SSH_NODE_CREDENTIALS['password'],
+                    timeout=0):
                 try:
                     ssh = ssh_client.SSHClient(
                         host=host, port=port,
@@ -476,21 +478,31 @@
                             username=username,
                             password=password))
 
-                    # If '/is_cloud_init_started' exists, then wait for
-                    # the flag /is_cloud_init_finished
-                    if ssh.execute(check_cloudinit_started)['exit_code'] == 0:
-                        result = ssh.execute(check_cloudinit_failed)
-                        if result['exit_code'] == 0:
-                            raise exceptions.EnvironmentNodeIsNotStarted(
-                                "{0}:{1}".format(host, port),
-                                result.stdout_str)
-
-                        status = ssh.execute(
+                    def bootstrap_is_successful():
+                        is_cloudinit_completed = ssh.execute(
+                            "tail -n1 /var/log/cloud-init.log |"
+                            "grep -q 'finish: modules-final: SUCCESS'"
+                                           )['exit_code'] == 0
+                        # cfg node doesn't have
+                        # 'finish: modules-final: SUCCESS' line
+                        # in the logs because the cfg node is rebooted during
+                        # bootstrap. Here is /var/log/mcp/.bootstrap_done
+                        # file used as a flag
+                        is_manually_set_flag = ssh.execute(
                             check_cloudinit_finished)['exit_code'] == 0
-                    # Else, just wait for SSH
+
+                        return is_cloudinit_completed or is_manually_set_flag
+
+                    def no_cloudinit():
+                        return ssh.execute("[ ! -d /var/lib/cloud/instance ]"
+                                           )['exit_code'] == 0
+
+                    if bootstrap_is_successful():
+                        return True
+                    elif no_cloudinit():
+                        return ssh.execute('echo ok')['exit_code'] == 0
                     else:
-                        status = ssh.execute('echo ok')['exit_code'] == 0
-                    return status
+                        return False
 
                 except (AuthenticationException, BadAuthenticationType):
                     return True
@@ -519,8 +531,10 @@
                     ip = self.node_ip(_node)
                     if ip not in passed:
                         passed[ip] = 0
-                    if _ssh_check(ip, port):
+                    if _readiness_check(ip, port):
                         passed[ip] += 1
+                        LOG.info("{} is already ready".format(_node['name']))
+                        nodes_by_roles.remove(_node)
                     else:
                         passed[ip] = 0