blob: 1f28a92ca1cd27b5737b639578eb8032d7c89094 [file] [log] [blame]
#! /bin/bash
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
set -e
set -u
set -o pipefail
readonly BOOT_TARGET="systemd.unit=kdump-save-dump.service"
readonly DUMPFILE_PREFIX="/var/kdump-"
readonly DUMPFILE_LN="/var/kdump"
readonly ESP_PARTITION="/dev/sda12"
readonly KDUMP_KERNEL="/boot/kdump/vmlinuz"
readonly PROG_NAME="$(basename ${0})"
readonly CRASHKERNEL_SIZE="0M-8G:64M,8G-128G:256M,128G-:512M"
#
# usage <exit_code>
#
# Print usage and exit.
#
# A typical workflow is:
# enable -> (reboot) -> (auto) load -> (crash) -> (auto) savecore -> disable
#
usage() {
local exit_code="${1}"
cat <<EOF
Usage:
${PROG_NAME} {show|cleanup|enable|disable|load|savecore|help}
show - Show kdump status, kexec command, and any current parameters.
cleanup - Cleanup the collected kernel crash dumps.
enable - Enable kdump feature (effective after reboot).
disable - Disable kdump feature (effective after reboot).
load - Load kdump kernel if kdump is enabled.
savecore - Save /proc/vmcore (only viable when running kdump kernel).
help - Print this message.
EOF
exit "${exit_code}"
}
#
# check_kdump_ready
#
# Answer the question "are we ready to load kdump kernel?" from:
# 1. crashkernel command line parameter.
# 2. /sys/kernel/kexec_crash_size.
#
check_kdump_ready() {
# if crashkernel parameter is not set in kernel command line
if grep -iqv "crashkernel=" /proc/cmdline; then
return 1
fi
# if memory reservation for kdump kernel failed
if [[ "$(cat /sys/kernel/kexec_crash_size)" == 0 ]]; then
return 1
fi
return 0
}
#
# check_kdump_enabled
#
# Answer the question "will kdump be enabled after next reboot?" from:
# The kernel commandline in grub.cfg.
#
check_kdump_enabled() {
local dir="$(mktemp -d)"
mount "${ESP_PARTITION}" "${dir}"
if grep -iq crashkernel= "${dir}/efi/boot/grub.cfg" ; then
local ret=0
else
local ret=1
fi
umount "${dir}"
rm -rf "${dir}"
return "${ret}"
}
#
# construct_commandline
#
# Construct the kernel commandline for kdump kernel.
#
# The kdump kernel commandline has below args:
# 1. noinitrd console=ttyS0: Same as primary kernel commandline.
# 2. maxcpus=1: Because kdump kernel functionality only supports single CPU.
# 3. root=PARTUUID=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:
# UUID of the block device containing the rootfs (reusing the one of primary
# kernel).
# 4. ${BOOT_TARGET}: The userspace target used for kernel dump.
#
construct_commandline() {
local -r PARTUUID_REGEX=".* [^[:space:]]*=(PARTUUID=[^[:space:]]*).*"
# partuuid_arg extracts commandline parameter such as:
# root=PARTUUID=4AD47D21-C3E5-1F4E-BDB3-0336CC5AFADC
# or
# payload=PARTUUID=4AD47D21-C3E5-1F4E-BDB3-0336CC5AFADC
local partuuid_arg="$(sed -rn "s/${PARTUUID_REGEX}/\1/p" /proc/cmdline)"
echo "${BOOT_TARGET} noinitrd console=ttyS0 root=${partuuid_arg} maxcpus=1"
}
#
# kdump_enable
#
# Enable kdump feature by modifying kernel commandline in grub.cfg.
# Takes effect after reboot.
#
kdump_enable() {
local dir="$(mktemp -d)"
mount "${ESP_PARTITION}" "${dir}"
local search=" cros_efi "
local replace=" cros_efi crashkernel=${CRASHKERNEL_SIZE} "
replace+="unknown_nmi_panic=1 panic_on_unrecovered_nmi=1 "
sed "s/$search/$replace/g" -i "${dir}/efi/boot/grub.cfg"
sync
umount "${dir}"
rm -rf "${dir}"
}
#
# kdump_disable
#
# Disable kdump feature by modifying kernel commandline in grub.cfg.
# Takes effect after reboot.
#
kdump_disable() {
local dir="$(mktemp -d)"
mount "${ESP_PARTITION}" "${dir}"
sed "s/ crashkernel=[^[:space:]]*//g" -i "${dir}/efi/boot/grub.cfg"
sed "s/ unknown_nmi_panic=[^[:space:]]*//g" \
-i "${dir}/efi/boot/grub.cfg"
sed "s/ panic_on_unrecovered_nmi=[^[:space:]]*//g" \
-i "${dir}/efi/boot/grub.cfg"
sync
umount "${dir}"
rm -rf "${dir}"
}
#
# kdump_save_core
#
# Filter and store /proc/vmcore to disk.
#
kdump_save_core() {
local filename="${DUMPFILE_PREFIX}$(date +'%Y%m%d%H%M')"
echo "Saving vmcore to ${filename}."
makedumpfile -c -d 31 /proc/vmcore "${filename}"
rm -f "${DUMPFILE_LN}"
ln -s "${filename}" "${DUMPFILE_LN}"
echo "sync-ing."
sync
echo "kdump finished."
}
#
# kdump_load
#
# Load the kdump kernel into reserved memory zone.
#
kdump_load() {
local cmdline="$(construct_commandline)"
kexec -s -p "${KDUMP_KERNEL}" --append="${cmdline}"
echo "Loaded kdump kernel ${KDUMP_KERNEL} with command line parameter:"
echo "${cmdline}"
}
#
# kdump_show
#
# Show the current state of kdump feature.
#
kdump_show() {
if check_kdump_ready; then
local kdump_ready=true
else
local kdump_ready=false
fi
if check_kdump_enabled; then
local kdump_enabled=true
else
local kdump_enabled=false
fi
if [[ "$(cat /sys/kernel/kexec_crash_loaded)" != "0" ]]; then
local kdump_loaded=true
else
local kdump_loaded=false
fi
echo "kdump enabled: ${kdump_enabled}"
echo "kdump ready: ${kdump_ready}"
echo "kdump kernel loaded: ${kdump_loaded}"
if [[ "${kdump_loaded}" == true ]]; then
echo "kdump kernel ${KDUMP_KERNEL} is loaded with command line parameter:"
echo "$(construct_commandline)"
fi
if [[ ! -f "${DUMPFILE_LN}" ]]; then
return
fi
echo "Collected kernel dumps:"
stat -c "path:%n date:%y size:%s bytes" "${DUMPFILE_PREFIX}"*
}
#
# kdump_cleanup
#
# Cleanup the collected kernel dumps.
#
kdump_cleanup() {
rm -f "${DUMPFILE_LN}"
rm -f "${DUMPFILE_PREFIX}"*
}
main() {
case "${1-help}" in
show)
kdump_show
;;
cleanup)
kdump_cleanup
;;
enable)
if ! check_kdump_enabled; then
kdump_enable
fi
if check_kdump_ready; then
echo "kdump is enabled and ready. No reboot required."
else
echo "kdump is enabled and needs a reboot to take effect."
fi
;;
disable)
if check_kdump_enabled; then
kdump_disable
fi
if check_kdump_ready; then
echo "kdump is disabled and needs a reboot to take effect."
else
echo "kdump is disabled completely. No reboot required."
fi
;;
load)
if ! check_kdump_ready; then
echo "kdump feature is not ready. Skipping loading kdump kernel."
exit 1
fi
echo "Loading kdump kernel..."
kdump_load
;;
savecore)
kdump_save_core
;;
help)
usage 0
;;
*)
usage 1
;;
esac
}
main "$@"