Script to generate post mortem of all crashes on device

BUG=4887

TEST=tested run_remote_tests and image_to_live still work.  Ran script with machine with no crashes, as well as on a machine with a slew of powerd CHECK(false)'s.

Change-Id: Iffb6571d30d99d876f41972f92a7149a716035ee

Review URL: http://codereview.chromium.org/3276002
This commit is contained in:
Ken Mixter 2010-08-31 12:07:11 -07:00
parent e13960240b
commit cc4f1dd14e
4 changed files with 193 additions and 35 deletions

171
cros_show_stacks Executable file
View File

@ -0,0 +1,171 @@
#!/bin/bash
# Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Script to generate stackdumps from a machine or dmp files.
# Load common constants. This should be the first executable line.
# The path to common.sh should be relative to your script's location.
. "$(dirname $0)/common.sh"
. "$(dirname $0)/remote_access.sh"
restart_in_chroot_if_needed $*
MINIDUMP_DUMP=/usr/bin/minidump_dump
MINIDUMP_STACKWALK=/usr/bin/minidump_stackwalk
USING_REMOTE=0
get_default_board
DEFINE_string board "${DEFAULT_BOARD}" \
"The board for which you are building autotest"
DEFINE_string breakpad_root "" \
"Path to root of breakpad symbols if pre-existing symbols should be used"
DEFINE_boolean clean ${FLAGS_FALSE} \
"Remove crash reports from remote system after showing stacks"
function usage() {
echo "usage: $(basename $0) [--remote=<IP>] [dump...]"
echo "Specify either a remote IP of a ChromeOS device to gather "
echo "all crash reports from, or list crash reports"
exit 1
}
# Clean up remote access and temp files.
function cleanup() {
[ ${USING_REMOTE} -eq 1 ] && cleanup_remote_access
rm -rf "${TMP}"
}
# Echoes kind of crash (minidump or kcrash).
function get_kind() {
local kind="${1##*.}"
if [ "${kind}" = "dmp" ]; then
kind="minidump"
fi
echo ${kind}
}
# Generate symbols for the given module list.
# Args:
# $1 - file with a "module" per line. A module is the full target's
# path to a DSO or executable that was loaded during a crash.
function generate_symbols() {
local modules_file="$1"
local modules=""
local any_missing=0
local module_count=0
for module in $(sort -u "${modules_file}"); do
local text_file="/build/${FLAGS_board}/${module}"
local debug_file="/build/${FLAGS_board}/usr/lib/debug/${module}.debug"
if [ -f "${text_file}" ] && [ -f "${debug_file}" ]; then
modules="${modules} ${text_file}"
module_count=$((module_count + 1))
else
if [ ${any_missing} -eq 0 ]; then
warn "Some modules are missing debug information:"
any_missing=1
fi
warn "* ${text_file}"
fi
done
if [ ${module_count} -gt 0 ]; then
info "Generating breakpad symbols for ${module_count} modules"
${SCRIPTS_DIR}/cros_generate_breakpad_symbols --board=${FLAGS_board} \
${modules}
fi
}
function main() {
FLAGS "$@" || usage
local basename=$(basename "$0")
TMP=$(mktemp -d /tmp/${basename}.XXXX)
trap cleanup EXIT INT TERM
if [ -n "${FLAGS_remote}" ]; then
remote_access_init
USING_REMOTE=1
learn_board
local crashes=""
# File spec of all interesting crashes. /home/chronos... is
# listed separately from /mnt/stateful_partition/home/chronos/...
# because the former may be a mount point for the cryptohome.
# This allows us to get crashes from the currently logged in
# user as well as from non-logged in users at once. We remove
# duplicate crashes (in case cryptohome is not mounted) below.
local remote_crash_dirs=" \
/var/spool/crash \
/home/chronos/user/crash \
/mnt/stateful_partition/home/chronos/user/crash"
local remote_crash_patterns=""
for remote_crash_dir in ${remote_crash_dirs}; do
remote_crash_patterns="${remote_crash_patterns} \
${remote_crash_dir}/*.{dmp,kcrash}"
done
remote_sh "ls -1 ${remote_crash_patterns}" 2> /dev/null
local crashes=${REMOTE_OUT}
# Remove duplicates.
local unique_crashes=""
local crash_count=0
for crash in ${crashes}; do
local crash_short=$(basename ${crash})
if echo "${unique_crashes}" | grep -v -q "${crash_short}"; then
unique_crashes="${unique_crashes} ${crash}"
crash_count=$((crash_count + 1))
fi
done
if [ ${crash_count} -eq 0 ]; then
info "No crashes found on device."
exit 0
fi
info "Copying back ${crash_count} crashes."
crashes="${unique_crashes}"
local filesfrom="${TMP}/filesfrom"
FLAGS_ARGV=""
for crash in ${crashes}; do
echo "${crash}" >> "${filesfrom}"
FLAGS_ARGV="${FLAGS_ARGV} '${TMP}/$(basename ${crash})'"
done
remote_rsync_from "${filesfrom}" "${TMP}"
if [ ${FLAGS_clean} -eq ${FLAGS_TRUE} ]; then
remote_sh "rm -rf ${remote_crash_dirs}"
fi
else
[ -n "${FLAGS_ARGV}" ] || usage
[ -n "${FLAGS_board}" ] || die "--board is required."
fi
local modules_file="${TMP}/modules"
for dump in ${FLAGS_ARGV}; do
dump=$(remove_quotes "${dump}")
if [ $(get_kind "${dump}") == "minidump" ]; then
# Find all DSOs and executables listed in lines like:
# (code_file) = "/usr/lib/mylib.so"
${MINIDUMP_DUMP} "${dump}" 2>/dev/null \
| grep code_file \
| sed 's/.*= "\(.*\)"/\1/' \
>> "${modules_file}"
fi
done
if [ -z "${FLAGS_breakpad_root}" ]; then
generate_symbols "${modules_file}"
FLAGS_breakpad_root=/build/${FLAGS_board}/usr/lib/debug/breakpad
fi
for dump in ${FLAGS_ARGV}; do
dump=$(remove_quotes "${dump}")
if [ $(get_kind "${dump}") = "minidump" ]; then
info "Dumping stack for $(basename ${dump}) with ${FLAGS_breakpad_root}:"
${MINIDUMP_STACKWALK} "${dump}" "${FLAGS_breakpad_root}" 2> /dev/null
else
info "Dumping kcrash $(basename ${dump}):"
cat "${dump}"
fi
echo ""
done
}
main "$@"

View File

@ -69,7 +69,7 @@ function copy_stateful_update {
local dev_dir="$(dirname $0)/../platform/dev" local dev_dir="$(dirname $0)/../platform/dev"
# Copy over update script and run update. # Copy over update script and run update.
remote_cp "$dev_dir/stateful_update" "/tmp" remote_cp_to "$dev_dir/stateful_update" "/tmp"
remote_sh "/tmp/stateful_update" remote_sh "/tmp/stateful_update"
} }

View File

@ -14,12 +14,20 @@ DEFINE_integer ssh_port 22 \
"SSH port of the remote machine running Chromium OS instance" "SSH port of the remote machine running Chromium OS instance"
# Copies $1 to $2 on remote host # Copies $1 to $2 on remote host
function remote_cp() { function remote_cp_to() {
REMOTE_OUT=$(scp -o StrictHostKeyChecking=no -o \ REMOTE_OUT=$(scp -o StrictHostKeyChecking=no -o \
UserKnownHostsFile=$TMP_KNOWN_HOSTS $1 root@$FLAGS_remote:$2) UserKnownHostsFile=$TMP_KNOWN_HOSTS $1 root@$FLAGS_remote:$2)
return ${PIPESTATUS[0]} return ${PIPESTATUS[0]}
} }
# Copies a list of remote files specified in file $1 to local location
# $2. Directory paths in $1 are collapsed into $2.
function remote_rsync_from() {
rsync -e "ssh -o StrictHostKeyChecking=no -o \
UserKnownHostsFile=$TMP_KNOWN_HOSTS" --no-R \
--files-from=$1 root@${FLAGS_remote}:/ $2
}
function remote_sh() { function remote_sh() {
REMOTE_OUT=$(ssh -p ${FLAGS_ssh_port} -o StrictHostKeyChecking=no -o \ REMOTE_OUT=$(ssh -p ${FLAGS_ssh_port} -o StrictHostKeyChecking=no -o \
UserKnownHostsFile=$TMP_KNOWN_HOSTS root@$FLAGS_remote "$@") UserKnownHostsFile=$TMP_KNOWN_HOSTS root@$FLAGS_remote "$@")
@ -48,6 +56,18 @@ function set_up_remote_access() {
echo "Connection OK" echo "Connection OK"
} }
# Ask the target what board it is
function learn_board() {
[ -n "${FLAGS_board}" ] && return
remote_sh grep CHROMEOS_RELEASE_BOARD /etc/lsb-release
FLAGS_board=$(echo "${REMOTE_OUT}" | cut -d '=' -f 2)
if [ -z "${FLAGS_board}" ]; then
error "Board required"
exit 1
fi
info "Target reports board is ${FLAGS_board}"
}
function cleanup_remote_access() { function cleanup_remote_access() {
# Call this function from the exit trap of the main script. # Call this function from the exit trap of the main script.
# Iff we started ssh-agent, be nice and clean it up. # Iff we started ssh-agent, be nice and clean it up.

View File

@ -63,25 +63,6 @@ function cleanup() {
cleanup_remote_access cleanup_remote_access
} }
# Returns an error if the test_result_file has text which indicates
# the test was not run successfully.
# Arguments:
# $1 - file name of autotest status for to check for success
# Returns:
# None
function is_successful_test() {
local file="$1"
# To be successful, must not have BAD, ERROR or FAIL in the file.
if egrep -q "(BAD|ERROR|FAIL)" "${file}"; then
return 1
fi
# To be successful, must have GOOD in the file.
if ! grep -q GOOD "${file}"; then
return 1
fi
return 0
}
# Adds attributes to all tests run # Adds attributes to all tests run
# Arguments: # Arguments:
# $1 - results directory # $1 - results directory
@ -103,20 +84,6 @@ function add_test_attribute() {
} }
# Ask the target what board it is
function learn_board() {
if [[ -n "${FLAGS_board}" ]]; then
return
fi
remote_sh grep CHROMEOS_RELEASE_BOARD /etc/lsb-release
FLAGS_board=$(echo "${REMOTE_OUT}" | cut -d= -f2)
if [[ -z "${FLAGS_board}" ]]; then
check_board
fi
echo "Target reports board is ${FLAGS_board}"
}
# Determine if a control is for a client or server test. Echos # Determine if a control is for a client or server test. Echos
# either "server" or "client". # either "server" or "client".
# Arguments: # Arguments: