blob: 0a1b9516cfc289a1b20b78a9b59675512787b289 [file] [log] [blame]
# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# This file is parsed by chromeos::KeyValueStore. It has the format:
#
# <exec_name>=<shell command>\n
#
# When a collector processes a crash, the corresponding command is executed and
# its standard output and standard error are attached to the crash report.
#
# The <exec_name> corresponds to exec_name used in the various collectors.
# For OS programs that crash (e.g. coredump), this will be the program's
# basename. For example, if /usr/bin/powerd crashes, it will be "powerd".
# For other collectors, the exec_name might have other values, so you'll have
# to consult the collector itself. For example, kernel warnings might be one
# of "kernel-warning", or "kernel-wifi-warning", or "kernel-suspend-warning".
#
# Shell commands may be split across multiple lines using trailing backslashes.
#
# The contents of the log will be stripped for some potentially sensitive info
# (see CrashCollector::StripSensitiveData for a description of what gets
# stripped), but try to avoid collection of potential PII at all times.
#
# Use caution in modifying this file. Only run common Unix commands here, as
# these commands will be run when a crash has recently occurred and we should
# avoid running anything that might cause another crash. Similarly, these
# commands block notification of the crash to parent processes, so commands
# should execute quickly.
update_engine=cat $(ls -1tr /var/log/update_engine | tail -5 | \
sed s.^./var/log/update_engine/.) | tail -c 50000
# Append the authpolicy logs when authpolicyd crashes. Authpolicyd is the daemon
# that handles Active Directory (AD) device management. It is disabled if AD
# management is not used, e.g. for ordinary cloud managed Chromebooks.
authpolicyd=\
echo "===authpolicyd output==="; \
tail -c 50000 /var/log/authpolicy.log
# The cros_installer output is logged into the update engine log file,
# so it is handled in the same way as update_engine.
cros_installer=cat $(ls -1tr /var/log/update_engine | tail -5 | \
sed s.^./var/log/update_engine/.) | tail -c 50000
# Dump the last 20 lines of the last two files in Chrome's system and user log
# directories, along with the last 20 messages from the session manager.
chrome=\
for f in $(ls -1rt /var/log/chrome/chrome_[0-9]* | tail -2) \
$(ls -1rt /home/chronos/u-*/log/chrome_[0-9]* 2>/dev/null | tail -2); do \
echo "===$f (tail)==="; \
tail -20 $f; \
echo EOF; \
echo; \
done; \
echo "===session_manager (tail)==="; \
awk '$3 ~ "^session_manager\\[" { print }' /var/log/messages | tail -20; \
echo EOF
# Dump the last DriveFS instance log for every account in every profile,
# last syslog messages related to either drivefs or cros-disks,
# and DriveFS' data directory structure.
drivefs=\
for i in /home/chronos/u-*/GCache/v2/*; do \
echo "===ls ${i}==="; \
ls -la "${i}"; \
echo "===ls ${i}/Logs==="; \
ls -la "${i}/Logs"; \
echo "===${i} drivefs==="; \
cat "${i}/Logs/drivefs.txt"; \
echo EOF; \
echo; \
done; \
echo "===messages (grep|tail)==="; \
egrep -a ' (disks|drivefs)\[' /var/log/messages | tail -20; \
echo EOF
# The following rule is used for generating additional diagnostics when
# collection of user crashes fails. This output should not be too large
# as it is stored in memory. The output format specified for 'ps' is the
# same as with the "u" ("user-oriented") option, except it doesn't show
# the commands' arguments (i.e. "comm" instead of "command").
crash_reporter-user-collection=\
echo "===ps output==="; \
ps axw -o user,pid,%cpu,%mem,vsz,rss,tname,stat,start_time,bsdtime,comm | \
tail -c 25000; \
echo "===meminfo==="; \
cat /proc/meminfo
# This rule is similar to the crash_reporter-user-collection rule, except it is
# run for kernel errors reported through udev events.
crash_reporter-udev-collection-change-card0-drm=\
echo "===i915/parameters==="; \
grep '' /sys/module/i915/parameters/* | \
sed -e 's!^/sys/module/i915/parameters/!!'; \
for dri in /sys/kernel/debug/dri/*; do \
echo "===$dri/i915_error_state==="; \
cat $dri/i915_error_state; \
echo "===$dri/i915_capabilities==="; \
cat $dri/i915_capabilities; \
echo "===$dri/i915_wa_registers==="; \
cat $dri/i915_wa_registers; \
done; \
echo EOF
# When trackpad driver cyapa detects some abnormal behavior, we collect
# additional logs from kernel messages.
crash_reporter-udev-collection-change--i2c-cyapa=\
/usr/sbin/kernel_log_collector.sh cyapa 30
# When trackpad/touchscreen driver atmel_mxt_ts detects some abnormal behavior,
# we collect additional logs from kernel messages.
# Note: the first arg to kernel_log_collector.sh is a keyword that
# must match the logging_UdevCrash autotest.
crash_reporter-udev-collection-change--i2c-atmel_mxt_ts=\
/usr/sbin/kernel_log_collector.sh atmel_mxt_ts 30
# When touch device noise are detected, we collect relevant logs.
# (crosbug.com/p/16788)
crash_reporter-udev-collection---TouchNoise=cat /var/log/touch_noise.log
# Periodically collect touch event log for debugging (crosbug.com/p/17244)
crash_reporter-udev-collection---TouchEvent=cat /var/log/touch_event.log
# Collect the last 50 lines of /var/log/messages and /var/log/net.log for
# intel wifi driver (iwlwifi) for debugging purpose.
crash_reporter-udev-collection-devcoredump-iwlwifi=\
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages; \
echo "===/var/log/net.log==="; \
tail -n 50 /var/log/net.log; \
echo EOF
# When the usb controller dies, collect all logs from the last 30 seconds
# that contain "xhci_hcd" or "usb".
crash_reporter-udev-collection-dead--usb=\
/usr/sbin/kernel_log_collector.sh 'xhci_hcd\|usb' 30
# This rule is used only in Tast integration tests to test crash reporting
# via udev.
crash_reporter-udev-collection---tast_udev_crash_test=echo ok
# Grep crash_reporter logs along with processes and meminfo to send over for
# when crash_reporter itself crashes.
crash_reporter_failure=\
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages; \
echo EOF
# Dump the last 50 lines of the last two powerd log files -- if the job has
# already restarted, we want to see the end of the previous instance's logs.
powerd=\
for f in $(ls -1tr /var/log/power_manager/powerd.[0-9]* | tail -2); do \
echo "===$(basename $f) (tail)==="; \
tail -50 $f; \
echo EOF; \
done
# If power_supply_info aborts (due to e.g. a bad battery), its failure message
# could end up in various places depending on which process was running it.
# Attach the end of powerd's log since it might've also logged the underlying
# problem.
power_supply_info=\
echo "===powerd.LATEST (tail)==="; \
tail -50 /var/log/power_manager/powerd.LATEST; \
echo EOF
# powerd_setuid_helper gets run by powerd, so its stdout/stderr will be mixed in
# with powerd's stdout/stderr.
powerd_setuid_helper=\
echo "===powerd.OUT (tail)==="; \
tail -50 /var/log/powerd.out; \
echo EOF
# For ARC service failures in journal, collect the last 50 lines up
# until the line indicating upstart process termination and the last 50 lines
# in /var/log/arc.log.
arc-service-failure=\
echo "=== syslog ==="; \
journalctl | grep -a -B50 ' process ([0-9]*) terminated with status ' \
| tail -50; \
echo "===/var/log/arc.log==="; \
tail -n 50 /var/log/arc.log; \
echo EOF
# For service failures in journal, collect the last 50 lines up until
# the line indicating upstart process termination.
service-failure=\
echo "=== syslog ==="; \
journalctl | grep -a -B50 ' process ([0-9]*) terminated with status ' \
| tail -50; \
echo EOF
# Collect the tail of the dmesg buffer.
kernel-warning=\
echo "===dmesg==="; \
dmesg | tail -n 100; \
echo EOF
# Collect the dmesg buffer and lspci. Compress this in
# KernelWarningCollector. For the lspci command, 0280 refers to the network
# controller, and 0604 refers to the PCI bridge.
kernel-wifi-warning=\
echo "===dmesg==="; \
dmesg; \
echo "===lspci==="; \
lspci -vvvv -d ::0280 | sed "/Device Serial Number/d"; \
lspci -vvvv -d ::0604 | sed "/Device Serial Number/d"; \
echo EOF
# Collect the suspend log which is in the dmesg buffer. Compress it in
# KernelWarningCollector.
kernel-suspend-warning=\
echo "===dmesg==="; \
dmesg; \
echo EOF
# Collect syslog and powerd logs for suspend failures.
# syslog is pretty verbose on suspend, so grab the latest 1000 lines.
suspend-failure=\
echo "=== syslog ==="; \
journalctl | grep -a -B1000 'Error writing to /sys/power/state: ' \
| tail -1000; \
echo "===powerd.LATEST (tail)==="; \
tail -50 /var/log/power_manager/powerd.LATEST; \
echo EOF
# Collect 100 lines of the journal for generic failures that don't specify a
# different exec name.
generic-failure=\
echo "=== syslog ==="; \
journalctl | tail -100; \
echo EOF
# The following rules are only for testing purposes.
crash_log_test=echo hello world
crash_log_recursion_test=sleep 1 && \
/usr/local/autotest/tests/crash_log_recursion_test
crash_log_recursion_tast_test=sleep 1 && \
/usr/local/libexec/tast/helpers/local/cros/crash_log_recursion_tast_test
# Collect the last 50 lines of /var/log/messages and for
# mosys for debugging purpose.
mosys=\
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep mosys; \
echo EOF
# Collect the last 50 lines of /var/log/messages and grep for memd because it
# outputs human readable error messages when it panics.
memd=\
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep memd; \
echo EOF
# Collect the last 50 lines of /var/log/messages and grep for anomaly_detector
# because it outputs human readable error messages when it panics. Note that
# I'm deliberately not using journalctl -t anomaly_detector here; many of
# anomaly_detector's failures relate to not being able to parse the journal.
anomaly_detector=\
echo "===/var/log/messages==="; \
tail -n 50 /var/log/messages | grep anomaly_detector; \
echo EOF