blob: de2010f5757c8ad4f88d85032a2fc36d737e4911 [file] [log] [blame]
# Copyright 2017 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This file is parsed by chromeos::KeyValueStore. It has the format:
# <exec_name>=<shell command>\n
# When a collector processes a crash, the corresponding command is executed and
# its standard output and standard error are attached to the crash report.
# The <exec_name> corresponds to exec_name used in the various collectors.
# For OS programs that crash (e.g. coredump), this will be the program's
# basename. For example, if /usr/bin/powerd crashes, it will be "powerd".
# For other collectors, the exec_name might have other values, so you'll have
# to consult the collector itself. For example, kernel warnings might be one
# of "kernel-warning", or "kernel-wifi-warning", or "kernel-suspend-warning".
# Shell commands may be split across multiple lines using trailing backslashes.
# The contents of the log will be stripped for some potentially sensitive info
# (see CrashCollector::StripSensitiveData for a description of what gets
# stripped), but try to avoid collection of potential PII at all times.
# Use caution in modifying this file. Only run common Unix commands here, as
# these commands will be run when a crash has recently occurred and we should
# avoid running anything that might cause another crash. Similarly, these
# commands block notification of the crash to parent processes, so commands
# should execute quickly.
update_engine=cat $(ls -1tr /var/log/update_engine | tail -5 | \
sed s.^./var/log/update_engine/.) | tail -c 50000
# Append the authpolicy logs when authpolicyd crashes. Authpolicyd is the daemon
# that handles Active Directory (AD) device management. It is disabled if AD
# management is not used, e.g. for ordinary cloud managed Chromebooks.
echo "===authpolicyd output==="; \
tail -c 50000 /var/log/authpolicy.log
# The cros_installer output is logged into the update engine log file,
# so it is handled in the same way as update_engine.
cros_installer=cat $(ls -1tr /var/log/update_engine | tail -5 | \
sed s.^./var/log/update_engine/.) | tail -c 50000
# Dump the last 20 lines of the last two files in Chrome's system and user log
# directories, along with the last 20 messages from the session manager.
# vmlog is included to detect if the system is swapping heavily.
# The process list is limited in width to save upload size. 120 columns is
# enough to capture the chrome process --type switch.
for f in $(ls -1rt /var/log/chrome/chrome_[0-9]* | tail -2) \
$(ls -1rt /home/chronos/u-*/log/chrome* 2>/dev/null | tail -2); do \
echo "===$f (tail)==="; \
tail -20 $f; \
echo EOF; \
echo; \
done; \
echo "===session_manager (tail)==="; \
awk '$3 ~ "^session_manager\\[" { print }' /var/log/messages | tail -20; \
echo EOF; \
echo ; \
echo "===top 100 processes by memory==="; \
ps ax -o %mem,rss,pid,user,args --sort -%mem --cols 120 | head --lines=100; \
echo EOF; \
echo ; \
echo "===vmlog.LATEST (tail)==="; \
tail -200 /var/log/vmlog/vmlog.LATEST; \
echo EOF
# Just grab /home/chronos/user/lacros/lacros.log
echo "===lacros.log (tail)==="; \
tail -100 /home/chronos/user/lacros/lacros.log; \
echo EOF
# For JavaScript errors from Chrome's SendJavaScriptErrorReport, just include
# the current Chrome logs. Session_manager logs aren't useful, and the previous
# Chrome log isn't useful (since JavaScript can't restart Chrome like a crash
# does). But do show the last few JavaScript printouts (usually marked with
# "CONSOLE") even if C++ is spitting out lots of irrelevant messages.
for f in $(ls -1rt /var/log/chrome/chrome_[0-9]* | tail -1) \
$(ls -1rt /home/chronos/u-*/log/chrome* 2>/dev/null | tail -1); do \
echo "===$f (tail)==="; \
tail -20 $f; \
echo EOF; \
echo; \
echo "===$f (CONSOLE tail)==="; \
tail -1000 $f | grep CONSOLE | tail -20; \
echo EOF; \
echo; \
# Dump the last DriveFS instance log for every account in every profile,
# last syslog messages related to either drivefs or cros-disks,
# and DriveFS' data directory structure.
for i in /home/chronos/u-*/GCache/v2/*; do \
echo "===ls ${i}==="; \
ls -la "${i}"; \
echo "===ls ${i}/Logs==="; \
ls -la "${i}/Logs"; \
echo "===${i} drivefs==="; \
cat "${i}/Logs/drivefs.txt"; \
echo EOF; \
echo; \
done; \
echo "===messages (grep|tail)==="; \
egrep -a ' (disks|drivefs)\[' /var/log/messages | tail -20; \
echo EOF
# The following rule is used for generating additional diagnostics when
# collection of user crashes fails. This output should not be too large
# as it is stored in memory. The output format specified for 'ps' is the
# same as with the "u" ("user-oriented") option, except it doesn't show
# the commands' arguments (i.e. "comm" instead of "command").
echo "===ps output==="; \
ps axw -o user,pid,%cpu,%mem,vsz,rss,tname,stat,start_time,bsdtime,comm | \
tail -c 25000; \
echo "===meminfo==="; \
cat /proc/meminfo
# This rule is similar to the crash_reporter-user-collection rule, except it is
# run for kernel errors reported through udev events.
echo "===i915/parameters==="; \
grep '' /sys/module/i915/parameters/* | \
sed -e 's!^/sys/module/i915/parameters/!!'; \
for dri in /sys/kernel/debug/dri/*; do \
echo "===$dri/i915_error_state==="; \
cat $dri/i915_error_state; \
echo "===$dri/i915_capabilities==="; \
cat $dri/i915_capabilities; \
echo "===$dri/i915_wa_registers==="; \
cat $dri/i915_wa_registers; \
done; \
echo EOF
# When trackpad driver cyapa detects some abnormal behavior, we collect
# additional logs from kernel messages.
/usr/sbin/ cyapa 30
# When trackpad/touchscreen driver atmel_mxt_ts detects some abnormal behavior,
# we collect additional logs from kernel messages.
/usr/sbin/ atmel_mxt_ts 30
# When touch device noise are detected, we collect relevant logs.
# (
crash_reporter-udev-collection---TouchNoise=cat /var/log/touch_noise.log
# Periodically collect touch event log for debugging (
crash_reporter-udev-collection---TouchEvent=cat /var/log/touch_event.log
# Collect the last 50 lines of /var/log/messages and /var/log/net.log for
# intel wifi driver (iwlwifi) for debugging purpose.
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages; \
echo "===/var/log/net.log==="; \
tail -n 50 /var/log/net.log; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages; \
echo EOF
echo "===dmesg==="; \
dmesg | tail -n 50; \
echo EOF
# When the usb controller dies, collect all logs from the last 30 seconds
# that contain "xhci_hcd" or "usb".
/usr/sbin/ 'xhci_hcd\|usb' 30
# This rule is used only in Tast integration tests to test crash reporting
# via udev.
crash_reporter-udev-collection---tast_udev_crash_test=echo ok
# Grep crash_reporter logs along with processes and meminfo to send over for
# when crash_reporter itself crashes.
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages; \
echo EOF
# Dump the last 50 lines of the last two powerd log files -- if the job has
# already restarted, we want to see the end of the previous instance's logs.
for f in $(ls -1tr /var/log/power_manager/powerd.[0-9]* | tail -2); do \
echo "===$(basename $f) (tail)==="; \
tail -50 $f; \
echo EOF; \
# If power_supply_info aborts (due to e.g. a bad battery), its failure message
# could end up in various places depending on which process was running it.
# Attach the end of powerd's log since it might've also logged the underlying
# problem.
echo "===powerd.LATEST (tail)==="; \
tail -50 /var/log/power_manager/powerd.LATEST; \
echo EOF
# powerd_setuid_helper gets run by powerd, so its stdout/stderr will be mixed in
# with powerd's stdout/stderr.
echo "===powerd.OUT (tail)==="; \
tail -50 /var/log/powerd.out; \
echo EOF
# For ARC service failures in journal, collect:
# * the last 50 lines of syslog
# * the last 50 lines up before the line indicating upstart process termination
# * the last 50 lines in /var/log/arc.log.
echo "=== syslog ==="; \
tail -n 50 /var/log/messages; \
echo "=== upstart ==="; \
grep -a -B50 ' process ([0-9]*) terminated with status ' /var/log/upstart.log\
| tail -50; \
echo "===/var/log/arc.log==="; \
tail -n 50 /var/log/arc.log; \
echo EOF
# For service failures in journal, collect:
# * the last 50 lines of syslog
# * the last 50 lines up until the line indicating upstart process termination
echo "=== syslog ==="; \
tail -n 50 /var/log/messages; \
echo "=== upstart ==="; \
grep -a -B50 ' process ([0-9]*) terminated with status ' /var/log/upstart.log\
| tail -50; \
echo EOF
# Collect the tail of the dmesg buffer.
echo "===dmesg==="; \
dmesg | tail -n 100; \
echo EOF
# Collect the dmesg buffer and lspci. Compress this in
# KernelWarningCollector. For the lspci command, 0280 refers to the network
# controller, and 0604 refers to the PCI bridge.
echo "===dmesg==="; \
dmesg; \
echo "===lspci==="; \
lspci -vvvv -d ::0280 | sed "/Device Serial Number/d"; \
lspci -vvvv -d ::0604 | sed "/Device Serial Number/d"; \
echo EOF
# Collect the smmu fault which is in the dmesg buffer. Compress it in
# KernelWarningCollector.
echo "===dmesg==="; \
dmesg | tail -n 100; \
echo "==smmu devices=="; \
ls -l /sys/class/iommu/*/devices; \
echo "==coreboot log=="; \
tail -50 /sys/firmware/log; \
echo EOF
# Collect the suspend log which is in the dmesg buffer. Compress it in
# KernelWarningCollector.
echo "===dmesg==="; \
dmesg; \
echo EOF
# Collect the suspend log which is in the dmesg buffer. Compress it in
# KernelWarningCollector.
echo "===dmesg==="; \
dmesg; \
echo EOF
# Collect ath10k log which is in the dmesg buffer. Compress it in
# KernelWarningCollector.
echo "===dmesg==="; \
dmesg; \
echo EOF
# Collect syslog and powerd logs for suspend failures.
# syslog is pretty verbose on suspend, so grab the latest 1000 lines.
echo "=== syslog ==="; \
croslog | grep -a -B1000 'Error writing to /sys/power/state: ' \
| tail -1000; \
echo "===powerd.LATEST (tail)==="; \
tail -50 /var/log/power_manager/powerd.LATEST; \
echo EOF
# if set_wifi_transmit_power fails, we need to collect some logs to be able to know
# the status of the WiFi module at the time of the request.
echo "===powerd.LATEST (tail)==="; \
tail -50 /var/log/power_manager/powerd.LATEST; \
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages; \
echo "===/var/log/net.log==="; \
tail -n 50 /var/log/net.log; \
echo EOF
# Collect 100 lines of the system log for generic failures that don't specify a
# different exec name.
echo "=== syslog ==="; \
croslog --lines=100; \
echo EOF
# The following rules are only for testing purposes.
crash_log_test=echo hello world
crash_log_recursion_tast_test=sleep 1 && \
# Collect the last 50 lines of /var/log/messages and for
# mosys for debugging purpose.
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep mosys; \
echo EOF
# Collect the last 50 lines of /var/log/messages and grep for memd because it
# outputs human readable error messages when it panics.
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep memd; \
echo EOF
# Collect the last 50 lines of /var/log/messages and grep for anomaly_detector
# because it outputs human readable error messages when it panics. Note that
# I'm deliberately not using journalctl -t anomaly_detector here; many of
# anomaly_detector's failures relate to not being able to parse the journal.
# The first tail (-n 1000) avoids reading excessive data from large log files.
echo "===/var/log/messages==="; \
tail -n 1000 /var/log/messages | grep anomaly.detector | tail -n 50; \
echo EOF
# Collect ec_info and ap version, which is useful information for debugging
# EC crashes.
echo "===ec_info==="; \
tail -c4096 /var/log/ec_info.txt; \
echo "===bios_info==="; \
tail -n 500 /var/log/bios_info.txt | grep fwid | tail -c4096; \
echo EOF
# Collect the first 100 lines of dmesg after kernel crash. This is useful for
# debugging when combined with the kernel crash logs from the last boot.
echo "===dmesg==="; \
dmesg | head -n 100; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep crash_sender; \
dirs="/var/spool/crash /home/chronos/crash /run/daemon-store/crash/*/\
/home/chronos/user/crash"; \
echo "===spool contents==="; \
ls -l $dirs; \
echo "===in-progress meta contents==="; \
for d in $dirs; do \
for p in $d/*.processing; do \
if [ -f $p ]; then\
echo "$p:"; \
cat ${p%.processing}.meta; \
fi \
done; \
done; \
echo EOF
# Collect auth failure log
echo "===auth-failure=="; \
tail -n 200 /var/log/tcsd/auth_failure.previous; \
echo EOF
# Collect last 100 lines of logs from mount-encrypted for debugging encrypted
# stateful mount failures.
echo "===mount-encrypted==="; \
tail -n 100 /run/mount_encrypted/mount-encrypted.log; \
echo EOF
# Collect ramoops for collecting early boot mount failures. There can be more
# than one record. Plus, for <linux-3.19, it's named "console-ramoops", but for
# newer versions, it's named "console-ramoops-#".
# Omit lines that could contain the pmsg-key.
# Under some conditions, the pmsg ramoops record can get exported as
# console ramoops, see
echo "===ramoops==="; \
for ramoops in /sys/fs/pstore/console-ramoops*; do \
echo "===${ramoops}==="; \
tail -n 100 $ramoops | grep -v pmsg-key; \
done; \
echo EOF
# Collect logs related to a clobber-state call triggered by something other
# than a mount error.
# * tmpfiles.log - recreated on every boot to capture error messages from
# systemd-tmpfiles during early boot (init/pre-startup and init/startup).
# If this log containers errors they are a strong indicator of the reason
# of the clobber.
# * clobber.log - written when the stateful partition is wiped during
# chromeos_startup. This helps debug early boot issues from failures such
# as systemd-tmpfiles exiting with an error.
# Note: that /mnt/stateful_partition/unencrypted/clobber.log exists before
# the clobber is performed, it is temporarily moved to /tmp/clobber.log
# during the clobber, and it is moved to /var/log/clobber.log after the
# clobber is completed.
# * dmesg - included so it is possible to identify when a localized
# filesystem corruption renders a file unusable. Check for an ext4 log
# entry that mentions what operation failed.
echo "===systemd-tmpfiles (tmpfiles.log)==="; \
head -n 500 /run/tmpfiles.log; \
echo "===clobber-state (clobber.log)==="; \
tail -n 500 /mnt/stateful_partition/unencrypted/clobber.log; \
echo "===dmesg (EXT4-fs only)==="; \
dmesg | grep EXT4-fs | tail -n 500; \
echo EOF
# Collect stateful partition dumpe2fs for debugging early boot mount failures.
# Only collect data on the superblock information and no detail about the block
# groups. In order to avoid increasing the permissions available to
# crash-reporter (/dev), run dumpe2fs from chromeos_startup and just log data
# from here.
echo "===dumpe2fs (stateful partition)==="; \
head -n 100 /run/dumpe2fs_stateful.log; \
echo EOF
# Collect encrypted stateful partition dumpe2fs for debugging early boot mount
# mount failures. On failing to mount the encrypted stateful partition,
# mount-encrypted dumps the superblock information into
# /run/dumpe2fs_encstateful.log. Note that this cannot be done out of band:
# mount-encrypted cleans up the dm-crypt mount on failure.
echo "===dumpe2fs (/dev/mapper/encstateful)==="; \
head -n 100 /run/mount_encrypted/dumpe2fs.log; \
echo EOF
# Collect logs from the shutdown umount failure state. On failing to shutdown
# cleanly, chromeos_shutdown logs current mounts, active processes and mount
# namespaces associated with them.
echo "===shutdown umount() failure logs==="; \
head -n 200 /run/shutdown_umount_failure.log; \
echo EOF
# Collect logs from 'mount-encrypted umount' to log failures during shutdown.
echo "===umount-encrypted==="; \
tail -n 100 /run/mount_encrypted/umount-encrypted.log; \
echo EOF
# Collect the last 100 lines of /var/log/messages and grep for bluetoothd for
# debugging purposes.
echo "===/var/log/messages==="; \
tail -n 100 /var/log/messages | grep bluetoothd; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep vm_concierge; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep vm_cicerone; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep seneschal; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep vmlog_forwarder; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep chunneld; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep cryptohome; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep tcsd; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep trunksd; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep -e trunksd -e tpm_manager; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep -e trunksd -e chaps -e cryptohome; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep bootlockbox; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep pca_agent; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep -e tpm_manager -e attestation; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | \
grep -e trunksd -e tpm_manager -e cryptohome; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | \
grep -e trunksd -e tpm_manager -e cryptohome; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | \
grep -e attestation -e tpm_manager -e cryptohome; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 200 /var/log/messages | grep -e trunksd -e attestation -e u2f; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 100 /var/log/messages | grep metrics_daemon; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 100 /var/log/messages | grep camera; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 100 /var/log/messages | grep camera; \
echo "===dmesg==="; \
dmesg | tail -n 100; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 100 /var/log/messages | grep cras_; \
echo EOF
# Fingerprint system early boot TPM seed transfer task.
for f in bio_crypto_init.LATEST bio_crypto_init.PREVIOUS; do \
echo "===$f (tail)==="; \
tail -n 100 /var/log/bio_crypto_init/$f; \
echo EOF; \
done; \
for f in cros_fp.log cros_fp.previous; do \
echo "===$f (tail)==="; \
tail -n 100 /var/log/$f; \
echo EOF; \
# Fingerprint system firmware updating task.
for f in bio_crypto_init.LATEST bio_crypto_init.PREVIOUS; do \
echo "===$f (tail)==="; \
tail -n 100 /var/log/bio_crypto_init/$f; \
echo EOF; \
done; \
for f in bio_fw_updater.LATEST bio_fw_updater.PREVIOUS; do \
echo "===$f (tail)==="; \
tail -n 100 /var/log/biod/$f; \
echo EOF; \
done; \
for f in cros_fp.log cros_fp.previous; do \
echo "===$f (tail)==="; \
tail -n 100 /var/log/$f; \
echo EOF; \
# Fingerprint system daemon.
# Unfortunately, the fingerprint services are somewhat coupled together through
# communication with the FPMCU. So, errors that could occur in bio_crypto_init
# and bio_fw_updater can cause bad behavior in biod. For this reason, we must
# collect logs from the previous tasks to understand a biod failure.
# Keep in mind that the minidump itself could be about 50KB, the process_tree
# log could be about 2KB, and each additional biod related log could be nearly
# 10KB. The max combined upload size must be less than 1MB, or the additional
# logs will be omitted.
for f in bio_crypto_init.LATEST bio_crypto_init.PREVIOUS; do \
echo "===$f (tail)==="; \
tail -n 100 /var/log/bio_crypto_init/$f; \
echo EOF; \
done; \
for f in bio_fw_updater.LATEST bio_fw_updater.PREVIOUS \
biod.LATEST biod.PREVIOUS; do \
echo "===$f (tail)==="; \
tail -n 100 /var/log/biod/$f; \
echo EOF; \
done; \
for f in cros_fp.log cros_fp.previous; do \
echo "===$f (tail)==="; \
tail -n 100 /var/log/$f; \
echo EOF; \
# hps daemon go/cros-hps
# powerd is the user of hps and may have more information
# TODO(b/227393528): Use croslog instead of grepping directly when feasible.
echo "=== syslog ==="; \
tail -n 1000 /var/log/messages | \
grep -E \
'(\bhpsd\b|\bHPS\b|\bHps\b|\bhps\b|Hps(Sense|Notify|Filter|Result))' | \
tail -n 100; \
echo "=== powerd.LATEST (tail) ==="; \
tail -100 /var/log/power_manager/powerd.LATEST; \
echo EOF
# feature daemon
echo "=== syslog entries ==="; \
croslog --identifier=featured --lines=1000 | tail -n 100; \
echo "=== syslog mentions ==="; \
croslog --grep=featured --lines=1000 | tail -n 100; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 20 /var/log/messages; \
echo "===/var/log/net.log==="; \
tail -n 80 /var/log/net.log | grep -v wifi; \
echo EOF
echo "===/var/log/messages==="; \
tail -n 100 /var/log/messages | grep "modemfwd\|kernel"; \
echo EOF
echo "===/var/log/net.log==="; \
tail -n 50 /var/log/net.log | grep "hermes"; \
echo EOF
# shill crashes may be due to interactions with other daemons, thus
# we do not grep for "shill" in the logs.
echo "===/var/log/messages==="; \
tail -n 20 /var/log/messages; \
echo "===/var/log/net.log==="; \
tail -n 80 /var/log/net.log; \
echo EOF