Add possibility to gather kernel crash dumps
Related-Prod: PROD-29189
Change-Id: Ic445e6fd47b6fbba1df8b4f561546692914a1974
diff --git a/linux/files/kdump-tools b/linux/files/kdump-tools
new file mode 100644
index 0000000..5ba75b8
--- /dev/null
+++ b/linux/files/kdump-tools
@@ -0,0 +1,104 @@
+# kdump-tools configuration
+# ---------------------------------------------------------------------------
+# USE_KDUMP - controls kdump will be configured
+# 0 - kdump kernel will not be loaded
+# 1 - kdump kernel will be loaded and kdump is configured
+# KDUMP_SYSCTL - controls when a panic occurs, using the sysctl
+# interface. The contents of this variable should be the
+# "variable=value ..." portion of the 'sysctl -w ' command.
+# If not set, the default value "kernel.panic_on_oops=1" will
+# be used. Disable this feature by setting KDUMP_SYSCTL=" "
+# Example - also panic on oom:
+# KDUMP_SYSCTL="kernel.panic_on_oops=1 vm.panic_on_oom=1"
+#
+USE_KDUMP=1
+#KDUMP_SYSCTL="kernel.panic_on_oops=1"
+
+
+# ---------------------------------------------------------------------------
+# Kdump Kernel:
+# KDUMP_KERNEL - A full pathname to a kdump kernel.
+# KDUMP_INITRD - A full pathname to the kdump initrd (if used).
+# If these are not set, kdump-config will try to use the current kernel
+# and initrd if it is relocatable. Otherwise, you will need to specify
+# these manually.
+KDUMP_KERNEL=/var/lib/kdump/vmlinuz
+KDUMP_INITRD=/var/lib/kdump/initrd.img
+
+
+# ---------------------------------------------------------------------------
+# vmcore Handling:
+# KDUMP_COREDIR - local path to save the vmcore to.
+# KDUMP_FAIL_CMD - This variable can be used to cause a reboot or
+# start a shell if saving the vmcore fails. If not set, "reboot -f"
+# is the default.
+# Example - start a shell if the vmcore copy fails:
+# KDUMP_FAIL_CMD="echo 'makedumpfile FAILED.'; /bin/bash; reboot -f"
+# KDUMP_DUMP_DMESG - This variable controls if the dmesg buffer is dumped.
+# If unset or set to 1, the dmesg buffer is dumped. If set to 0, the dmesg
+# buffer is not dumped.
+KDUMP_COREDIR="/var/crash"
+#KDUMP_FAIL_CMD="reboot -f"
+#KDUMP_DUMP_DMESG=
+# KDUMP_NUM_DUMPS - This variable controls how many dump files are kept on
+# the machine to prevent running out of disk space. If set to 0 or unset,
+# the variable is ignored and no dump files are automatically purged.
+KDUMP_COREDIR="/var/crash"
+#KDUMP_FAIL_CMD="reboot -f"
+#KDUMP_DUMP_DMESG=
+#KDUMP_NUM_DUMPS=
+
+
+# ---------------------------------------------------------------------------
+# Makedumpfile options:
+# MAKEDUMP_ARGS - extra arguments passed to makedumpfile (8). The default,
+# if unset, is to pass '-c -d 31' telling makedumpfile to use compression
+# and reduce the corefile to in-use kernel pages only.
+#MAKEDUMP_ARGS="-c -d 31"
+
+
+# ---------------------------------------------------------------------------
+# Kexec/Kdump args
+# KDUMP_KEXEC_ARGS - Additional arguments to the kexec command used to load
+# the kdump kernel
+# Example - Use this option on x86 systems with PAE and more than
+# 4 gig of memory:
+# KDUMP_KEXEC_ARGS="--elf64-core-headers"
+# KDUMP_CMDLINE - The default is to use the contents of /proc/cmdline.
+# Set this variable to override /proc/cmdline.
+# KDUMP_CMDLINE_APPEND - Additional arguments to append to the command line
+# for the kdump kernel. If unset, it defaults to
+# "reset_devices nr_cpus=1 systemd.unit=kdump-tools-dump.service irqpoll nousb ata_piix.prefer_ms_hyperv=0"
+#KDUMP_KEXEC_ARGS=""
+#KDUMP_CMDLINE=""
+#KDUMP_CMDLINE_APPEND="reset_devices nr_cpus=1 systemd.unit=kdump-tools-dump.service irqpoll nousb ata_piix.prefer_ms_hyperv=0"
+
+# ---------------------------------------------------------------------------
+# Architecture specific Overrides:
+
+# ---------------------------------------------------------------------------
+# Remote dump facilities:
+# SSH - username and hostname of the remote server that will receive the dump
+# and dmesg files.
+# SSH_KEY - Full path of the ssh private key to be used to login to the remote
+# server. use kdump-config propagate to send the public key to the
+# remote server
+# HOSTTAG - Select if hostname of IP address will be used as a prefix to the
+# timestamped directory when sending files to the remote server.
+# 'ip' is the default.
+# NFS - Hostname and mount point of the NFS server configured to receive
+# the crash dump. The syntax must be {HOSTNAME}:{MOUNTPOINT}
+# (e.g. remote:/var/crash)
+# NFS_TIMEO - Timeout before NFS retries a request. See man nfs(5) for details.
+# NFS_RETRANS - Number of times NFS client retries a request. See man nfs(5) for details.
+# SSH="<user at server>"
+#
+# SSH_KEY="<path>"
+#
+# HOSTTAG="hostname|[ip]"
+#
+# NFS="<nfs mount>"
+#
+# NFS_TIMEO="600"
+#
+# NFS_RETRANS="3"
diff --git a/linux/files/kexec b/linux/files/kexec
new file mode 100644
index 0000000..b054eed
--- /dev/null
+++ b/linux/files/kexec
@@ -0,0 +1,15 @@
+# Defaults for kexec initscript
+# sourced by /etc/init.d/kexec and /etc/init.d/kexec-load
+
+# Load a kexec kernel (true/false)
+LOAD_KEXEC=true
+
+# Kernel and initrd image
+KERNEL_IMAGE="/vmlinuz"
+INITRD="/initrd.img"
+
+# If empty, use current /proc/cmdline
+APPEND=""
+
+# Load the default kernel from grub config (true/false)
+USE_GRUB_CONFIG=false
diff --git a/linux/map.jinja b/linux/map.jinja
index 276d526..ce0f61a 100644
--- a/linux/map.jinja
+++ b/linux/map.jinja
@@ -68,6 +68,9 @@
'enabled': false,
'name': 'deadline',
},
+ 'kernel_crash_dump': {
+ 'enabled': false
+ }
},
'RedHat': {
'pkgs': ['policycoreutils', 'policycoreutils-python', 'telnet', 'wget'],
diff --git a/linux/system/init.sls b/linux/system/init.sls
index bc9b20d..5977dd6 100644
--- a/linux/system/init.sls
+++ b/linux/system/init.sls
@@ -130,3 +130,6 @@
{%- if system.mcelog is defined %}
- linux.system.mcelog
{%- endif %}
+{%- if system.kernel_crash_dump is defined %}
+- linux.system.kernel_crash_dump
+{%- endif %}
diff --git a/linux/system/kernel_crash_dump.sls b/linux/system/kernel_crash_dump.sls
new file mode 100644
index 0000000..880eb36
--- /dev/null
+++ b/linux/system/kernel_crash_dump.sls
@@ -0,0 +1,43 @@
+{%- from "linux/map.jinja" import system with context %}
+
+{%- if system.kernel_crash_dump.enabled %}
+
+kernel_crash_dump_packages:
+ pkg.installed:
+ - name: linux-crashdump
+
+kexec_defaults:
+ file.managed:
+ - name: /etc/default/kexec
+ - source: salt://linux/files/kexec
+ - user: root
+ - mode: 644
+ - template: jinja
+ - require:
+ - pkg: kernel_crash_dump_packages
+
+kdump_tools_defaults:
+ file.managed:
+ - name: /etc/default/kdump-tools
+ - source: salt://linux/files/kdump-tools
+ - user: root
+ - mode: 644
+ - template: jinja
+ - require:
+ - pkg: kernel_crash_dump_packages
+
+{%- else %}
+
+kernel_crash_dump_defaults_purge:
+ file.absent:
+ - names:
+ - /etc/default/kexec
+ - /etc/default/kdump-tools
+ - require:
+ - pkg: kernel_crash_dump_pkg_purge
+
+kernel_crash_dump_pkg_purge:
+ pkg.purged:
+ - name: linux-crashdump
+
+{%- endif %}