Add tool get_logs.py to download node logs

Change-Id: I834b699ed8968d70b20bf343e4f46df465fc4f5e
diff --git a/jobs/pipelines/swarm-bootstrap-salt-cluster-devops.groovy b/jobs/pipelines/swarm-bootstrap-salt-cluster-devops.groovy
index 64c8783..570f47f 100644
--- a/jobs/pipelines/swarm-bootstrap-salt-cluster-devops.groovy
+++ b/jobs/pipelines/swarm-bootstrap-salt-cluster-devops.groovy
@@ -38,54 +38,54 @@
         error "'PARENT_WORKSPACE' contains path to non-existing directory ${PARENT_WORKSPACE} on the node '${PARENT_NODE_NAME}'."
     }
     dir("${PARENT_WORKSPACE}") {
+        stage("Cleanup: erase ${ENV_NAME} and remove config drive") {
+            println "Remove environment ${ENV_NAME}"
+            shared.run_cmd("""\
+                dos.py erase ${ENV_NAME} || true
+            """)
+            println "Remove config drive ISO"
+            shared.run_cmd("""\
+                rm /home/jenkins/images/${CFG01_CONFIG_IMAGE_NAME} || true
+            """)
+        }
+
+        if (env.TCP_QA_REFS) {
+            stage("Update working dir to patch ${TCP_QA_REFS}") {
+                shared.update_working_dir()
+            }
+        }
+
+        stage("Create an environment ${ENV_NAME} in disabled state") {
+            // deploy_hardware.xml
+            shared.run_cmd("""\
+                export ENV_NAME=${ENV_NAME}
+                export LAB_CONFIG_NAME=${LAB_CONFIG_NAME}
+                export MANAGER=devops
+                export PYTHONIOENCODING=UTF-8
+                export REPOSITORY_SUITE=${MCP_VERSION}
+                export TEST_GROUP=test_create_environment
+                py.test -vvv -s -p no:django -p no:ipdb --junit-xml=deploy_hardware.xml -k \${TEST_GROUP}
+            """)
+        }
+
+        stage("Generate the model") {
+            shared.generate_cookied_model()
+        }
+
+        stage("Generate config drive ISO") {
+            shared.generate_configdrive_iso()
+        }
+
+        stage("Upload generated config drive ISO into volume on cfg01 node") {
+            shared.run_cmd("""\
+                # Get SALT_MASTER_HOSTNAME to determine the volume name
+                . ./tcp_tests/utils/env_salt
+                virsh vol-upload ${ENV_NAME}_\${SALT_MASTER_HOSTNAME}_config /home/jenkins/images/${CFG01_CONFIG_IMAGE_NAME} --pool default
+                virsh pool-refresh --pool default
+            """)
+        }
+
         try {
-            stage("Cleanup: erase ${ENV_NAME} and remove config drive") {
-                println "Remove environment ${ENV_NAME}"
-                shared.run_cmd("""\
-                    dos.py erase ${ENV_NAME} || true
-                """)
-                println "Remove config drive ISO"
-                shared.run_cmd("""\
-                    rm /home/jenkins/images/${CFG01_CONFIG_IMAGE_NAME} || true
-                """)
-            }
-
-            if (env.TCP_QA_REFS) {
-                stage("Update working dir to patch ${TCP_QA_REFS}") {
-                    shared.update_working_dir()
-                }
-            }
-
-            stage("Create an environment ${ENV_NAME} in disabled state") {
-                // deploy_hardware.xml
-                shared.run_cmd("""\
-                    export ENV_NAME=${ENV_NAME}
-                    export LAB_CONFIG_NAME=${LAB_CONFIG_NAME}
-                    export MANAGER=devops
-                    export PYTHONIOENCODING=UTF-8
-                    export REPOSITORY_SUITE=${MCP_VERSION}
-                    export TEST_GROUP=test_create_environment
-                    py.test -vvv -s -p no:django -p no:ipdb --junit-xml=deploy_hardware.xml -k \${TEST_GROUP}
-                """)
-            }
-
-            stage("Generate the model") {
-                shared.generate_cookied_model()
-            }
-
-            stage("Generate config drive ISO") {
-                shared.generate_configdrive_iso()
-            }
-
-            stage("Upload generated config drive ISO into volume on cfg01 node") {
-                shared.run_cmd("""\
-                    # Get SALT_MASTER_HOSTNAME to determine the volume name
-                    . ./tcp_tests/utils/env_salt
-                    virsh vol-upload ${ENV_NAME}_\${SALT_MASTER_HOSTNAME}_config /home/jenkins/images/${CFG01_CONFIG_IMAGE_NAME} --pool default
-                    virsh pool-refresh --pool default
-                """)
-            }
-
             stage("Run the 'underlay' and 'salt-deployed' fixtures to bootstrap salt cluster") {
                 // deploy_salt.xml
                 shared.run_cmd("""\
@@ -103,7 +103,8 @@
             }
 
           } catch (e) {
-              common.printMsg("Job is failed", "purple")
+              common.printMsg("Saltstack cluster deploy is failed", "purple")
+              shared.download_logs("deploy_salt")
               throw e
           } finally {
             // TODO(ddmitriev): analyze the "def currentResult = currentBuild.result ?: 'SUCCESS'"
diff --git a/jobs/pipelines/swarm-deploy-cicd.groovy b/jobs/pipelines/swarm-deploy-cicd.groovy
index 5ace2ca..7d7fd63 100644
--- a/jobs/pipelines/swarm-deploy-cicd.groovy
+++ b/jobs/pipelines/swarm-deploy-cicd.groovy
@@ -29,29 +29,30 @@
         error "'PARENT_WORKSPACE' contains path to non-existing directory ${PARENT_WORKSPACE} on the node '${PARENT_NODE_NAME}'."
     }
     dir("${PARENT_WORKSPACE}") {
-        try {
 
-            if (! env.STACK_INSTALL) {
-                error "'STACK_INSTALL' must contain one or more comma separated stack names for [deploy_openstack] pipeline"
+        if (! env.STACK_INSTALL) {
+            error "'STACK_INSTALL' must contain one or more comma separated stack names for [deploy_openstack] pipeline"
+        }
+
+        if (env.TCP_QA_REFS) {
+            stage("Update working dir to patch ${TCP_QA_REFS}") {
+                shared.update_working_dir()
+            }
+        }
+
+        // Install core and cicd
+        def stack
+        def timeout
+
+        for (element in "${env.STACK_INSTALL}".split(",")) {
+            if (element.contains(':')) {
+                (stack, timeout) = element.split(':')
+            } else {
+                stack = element
+                timeout = '1800'
             }
 
-            if (env.TCP_QA_REFS) {
-                stage("Update working dir to patch ${TCP_QA_REFS}") {
-                    shared.update_working_dir()
-                }
-            }
-
-            // Install core and cicd
-            def stack
-            def timeout
-
-            for (element in "${env.STACK_INSTALL}".split(",")) {
-                if (element.contains(':')) {
-                    (stack, timeout) = element.split(':')
-                } else {
-                    stack = element
-                    timeout = '1800'
-                }
+            try {
                 stage("Run Jenkins job on salt-master [deploy_openstack:${stack}]") {
                     shared.run_job_on_day01_node(stack, timeout)
                 }
@@ -60,23 +61,25 @@
                     shared.sanity_check_component(stack)
                 }
 
-                stage("Make environment snapshot [${stack}_deployed]") {
-                    shared.devops_snapshot(stack)
+            } catch (e) {
+                common.printMsg("Job is failed", "purple")
+                shared.download_logs("deploy_${stack}")
+                throw e
+            } finally {
+                // TODO(ddmitriev): analyze the "def currentResult = currentBuild.result ?: 'SUCCESS'"
+                // and report appropriate data to TestRail
+                // TODO(ddmitriev): add checks for cicd cluster
+                if ("${env.SHUTDOWN_ENV_ON_TEARDOWN}" == "true") {
+                    shared.run_cmd("""\
+                        dos.py destroy ${ENV_NAME}
+                    """)
                 }
             }
 
-        } catch (e) {
-            common.printMsg("Job is failed", "purple")
-            throw e
-        } finally {
-            // TODO(ddmitriev): analyze the "def currentResult = currentBuild.result ?: 'SUCCESS'"
-            // and report appropriate data to TestRail
-            // TODO(ddmitriev): add checks for cicd cluster
-            if ("${env.SHUTDOWN_ENV_ON_TEARDOWN}" == "true") {
-                shared.run_cmd("""\
-                    dos.py destroy ${ENV_NAME}
-                """)
+            stage("Make environment snapshot [${stack}_deployed]") {
+                shared.devops_snapshot(stack)
             }
-        }
-    }
-}
+
+        } // for
+    } // dir
+} // node
diff --git a/jobs/pipelines/swarm-deploy-platform.groovy b/jobs/pipelines/swarm-deploy-platform.groovy
index 9a6b1d1..42ebc7e 100644
--- a/jobs/pipelines/swarm-deploy-platform.groovy
+++ b/jobs/pipelines/swarm-deploy-platform.groovy
@@ -29,29 +29,31 @@
         error "'PARENT_WORKSPACE' contains path to non-existing directory ${PARENT_WORKSPACE} on the node '${PARENT_NODE_NAME}'."
     }
     dir("${PARENT_WORKSPACE}") {
-        try {
 
-            if (! env.STACK_INSTALL) {
-                error "'STACK_INSTALL' must contain one or more comma separated stack names for [deploy_openstack] pipeline"
+        if (! env.STACK_INSTALL) {
+            error "'STACK_INSTALL' must contain one or more comma separated stack names for [deploy_openstack] pipeline"
+        }
+
+        if (env.TCP_QA_REFS) {
+            stage("Update working dir to patch ${TCP_QA_REFS}") {
+                shared.update_working_dir()
+            }
+        }
+
+        // Install the cluster
+        def stack
+        def timeout
+
+        for (element in "${STACK_INSTALL}".split(",")) {
+            if (element.contains(':')) {
+                (stack, timeout) = element.split(':')
+            } else {
+                stack = element
+                timeout = '1800'
             }
 
-            if (env.TCP_QA_REFS) {
-                stage("Update working dir to patch ${TCP_QA_REFS}") {
-                    shared.update_working_dir()
-                }
-            }
+            try {
 
-            // Install the cluster
-            def stack
-            def timeout
-
-            for (element in "${STACK_INSTALL}".split(",")) {
-                if (element.contains(':')) {
-                    (stack, timeout) = element.split(':')
-                } else {
-                    stack = element
-                    timeout = '1800'
-                }
                 stage("Run Jenkins job on CICD [deploy_openstack:${stack}]") {
                     shared.run_job_on_cicd_nodes(stack, timeout)
                 }
@@ -60,23 +62,25 @@
                     shared.sanity_check_component(stack)
                 }
 
-                stage("Make environment snapshot [${stack}_deployed]") {
-                    shared.devops_snapshot(stack)
+            } catch (e) {
+                common.printMsg("Job is failed", "purple")
+                shared.download_logs("deploy_${stack}")
+                throw e
+            } finally {
+                // TODO(ddmitriev): analyze the "def currentResult = currentBuild.result ?: 'SUCCESS'"
+                // and report appropriate data to TestRail
+                // TODO(ddmitriev): add checks for the installed stacks
+                if ("${env.SHUTDOWN_ENV_ON_TEARDOWN}" == "true") {
+                    shared.run_cmd("""\
+                        dos.py destroy ${ENV_NAME}
+                    """)
                 }
             }
 
-        } catch (e) {
-            common.printMsg("Job is failed", "purple")
-            throw e
-        } finally {
-            // TODO(ddmitriev): analyze the "def currentResult = currentBuild.result ?: 'SUCCESS'"
-            // and report appropriate data to TestRail
-            // TODO(ddmitriev): add checks for the installed stacks
-            if ("${env.SHUTDOWN_ENV_ON_TEARDOWN}" == "true") {
-                shared.run_cmd("""\
-                    dos.py destroy ${ENV_NAME}
-                """)
+            stage("Make environment snapshot [${stack}_deployed]") {
+                shared.devops_snapshot(stack)
             }
-        }
-    }
-}
+
+        } // for
+    } // dir
+} // node
diff --git a/jobs/pipelines/swarm-run-pytest.groovy b/jobs/pipelines/swarm-run-pytest.groovy
index 0dd2d7a..bc411f7 100644
--- a/jobs/pipelines/swarm-run-pytest.groovy
+++ b/jobs/pipelines/swarm-run-pytest.groovy
@@ -72,6 +72,7 @@
                     """)
 
                 def snapshot_name = "test_completed"
+                shared.download_logs("test_completed")
                 shared.run_cmd("""\
                     dos.py suspend ${ENV_NAME}
                     dos.py snapshot ${ENV_NAME} ${snapshot_name}
@@ -86,6 +87,9 @@
 
         } catch (e) {
             common.printMsg("Job is failed", "purple")
+            // Downloading logs usually not needed here
+            // because tests should use the decorator @pytest.mark.grab_versions
+            // shared.download_logs("test_failed")
             throw e
         } finally {
             // TODO(ddmitriev): analyze the "def currentResult = currentBuild.result ?: 'SUCCESS'"
diff --git a/jobs/pipelines/swarm-testrail-report.groovy b/jobs/pipelines/swarm-testrail-report.groovy
index e1fadc7..42027f0 100644
--- a/jobs/pipelines/swarm-testrail-report.groovy
+++ b/jobs/pipelines/swarm-testrail-report.groovy
@@ -76,7 +76,7 @@
                     report_url = report_result.split("\n").each {
                         if (it.contains("[TestRun URL]")) {
                             common.printMsg("Found report URL: " + it.trim().split().last(), "blue")
-                            description += "<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
+                            description += "\n<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
                         }
                     }
                 }
@@ -98,7 +98,7 @@
                     report_url = report_result.split("\n").each {
                         if (it.contains("[TestRun URL]")) {
                             common.printMsg("Found report URL: " + it.trim().split().last(), "blue")
-                            description += "<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
+                            description += "\n<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
                         }
                     }
                 }
@@ -115,7 +115,7 @@
                     report_url = report_result.split("\n").each {
                         if (it.contains("[TestRun URL]")) {
                             common.printMsg("Found report URL: " + it.trim().split().last(), "blue")
-                            description += "<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
+                            description += "\n<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
                         }
                     }
                 }
@@ -144,7 +144,7 @@
                     report_url = report_result.split("\n").each {
                         if (it.contains("[TestRun URL]")) {
                             common.printMsg("Found report URL: " + it.trim().split().last(), "blue")
-                            description += "<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
+                            description += "\n<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
                         }
                     }
                 }
@@ -161,7 +161,7 @@
                     report_url = report_result.split("\n").each {
                         if (it.contains("[TestRun URL]")) {
                             common.printMsg("Found report URL: " + it.trim().split().last(), "blue")
-                            description += "<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
+                            description += "\n<a href=" + it.trim().split().last() + ">${testSuiteName}</a>"
                         }
                     }
                 }
diff --git a/src/com/mirantis/system_qa/SharedPipeline.groovy b/src/com/mirantis/system_qa/SharedPipeline.groovy
index 4a262d2..9b5621d 100644
--- a/src/com/mirantis/system_qa/SharedPipeline.groovy
+++ b/src/com/mirantis/system_qa/SharedPipeline.groovy
@@ -396,6 +396,17 @@
     }
 }
 
+def download_logs(archive_name_prefix) {
+    // Archive and download logs and debug info from salt nodes in the lab
+    // Do not fail in case of error to not lose the original error from the parent exception.
+    def common = new com.mirantis.mk.Common()
+    common.printMsg("Downloading nodes logs by ${archive_name_prefix}", "blue")
+    run_cmd("""\
+        export TESTS_CONFIGS=\$(pwd)/${ENV_NAME}_salt_deployed.ini
+        ./tcp_tests/utils/get_logs.py --archive-name-prefix ${archive_name_prefix} || true
+    """)
+}
+
 def devops_snapshot_info(snapshot_name) {
     // Print helper message after snapshot
     def common = new com.mirantis.mk.Common()
diff --git a/tcp_tests/managers/underlay_ssh_manager.py b/tcp_tests/managers/underlay_ssh_manager.py
index 5e7995c..0bfb463 100644
--- a/tcp_tests/managers/underlay_ssh_manager.py
+++ b/tcp_tests/managers/underlay_ssh_manager.py
@@ -398,12 +398,12 @@
             "docker ps > /root/\$(hostname -f)/dump_docker_ps.txt;"
             "docker service ls > "
             "  /root/\$(hostname -f)/dump_docker_services_ls.txt;"
-            "for SERVICE in \$(docker service ls | awk '{ print $2 }'); "
+            "for SERVICE in \$(docker service ls | awk '{ print \$2 }'); "
             "  do docker service ps --no-trunc 2>&1 \$SERVICE >> "
             "    /root/\$(hostname -f)/dump_docker_service_ps.txt;"
             "  done;"
-            "for SERVICE in \$(docker service ls | awk '{ print $2 }'); "
-            "  do docker service logs 2>&1 \$SERVICE > "
+            "for SERVICE in \$(docker service ls | awk '{ print \$2 }'); "
+            "  do timeout 30 docker service logs --no-trunc 2>&1 \$SERVICE > "
             "    /root/\$(hostname -f)/dump_docker_service_\${SERVICE}_logs;"
             "  done;"
             "vgdisplay > /root/\$(hostname -f)/dump_vgdisplay.txt;"
diff --git a/tcp_tests/utils/get_logs.py b/tcp_tests/utils/get_logs.py
new file mode 100755
index 0000000..225f9d7
--- /dev/null
+++ b/tcp_tests/utils/get_logs.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+import sys
+import time
+
+sys.path.append(os.getcwd())
+try:
+    from tcp_tests.fixtures import config_fixtures
+    from tcp_tests.managers import underlay_ssh_manager
+except ImportError:
+    print("ImportError: Run the application from the tcp-qa directory or "
+          "set the PYTHONPATH environment variable to directory which contains"
+          " ./tcp_tests")
+    sys.exit(1)
+
+
+def load_params():
+    """
+    Parse CLI arguments and environment variables
+
+    Returns: ArgumentParser instance
+    """
+    parser = argparse.ArgumentParser(description=(
+        'Download logs and debug info from salt minions'
+    ))
+    default_name_prefix = 'logs_' + time.strftime("%Y%m%d_%H%M%S")
+    parser.add_argument('--archive-name-prefix',
+                        help=('Custom prefix for creating archive name'),
+                        default=default_name_prefix,
+                        type=str)
+    return parser
+
+
+def main():
+    parser = load_params()
+    opts = parser.parse_args()
+
+    tests_configs = os.environ.get('TESTS_CONFIGS', None)
+    if not tests_configs or not os.path.isfile(tests_configs):
+        print("Download logs and debug info from salt minions. "
+              "Please set TESTS_CONFIGS environment variable whith"
+              "the path to INI file with lab metadata.")
+        return 11
+
+    config = config_fixtures.config()
+    underlay = underlay_ssh_manager.UnderlaySSHManager(config)
+
+    underlay.get_logs(opts.archive_name_prefix)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/tcp_tests/utils/run_jenkins_job.py b/tcp_tests/utils/run_jenkins_job.py
index b01f366..acc2e9f 100755
--- a/tcp_tests/utils/run_jenkins_job.py
+++ b/tcp_tests/utils/run_jenkins_job.py
@@ -4,7 +4,6 @@
 import os
 import sys
 
-from devops import error
 import json
 
 sys.path.append(os.getcwd())
@@ -140,7 +139,7 @@
             interval=1,
             verbose=opts.verbose,
             job_output_prefix=opts.job_output_prefix)
-    except error.TimeoutError as e:
+    except Exception as e:
         print(str(e))
         raise