remote_access: Clean up output from remote_reboot

Previously the remote_reboot function produced a large number of
warnings and errors from the ssh process. This replaces it with a
quieter version that periodically (every 5-10s) echos its status.

BUG=None
TEST=Few iterations with x86-alex via a test script and update_kernel.

Change-Id: Id606da64a7e55e902e4119e1dfc9f76cf27d5b5e
Reviewed-on: https://gerrit.chromium.org/gerrit/17139
Commit-Ready: Chris Wolfe <cwolfe@chromium.org>
Reviewed-by: Chris Wolfe <cwolfe@chromium.org>
Tested-by: Chris Wolfe <cwolfe@chromium.org>
This commit is contained in:
Chris Wolfe 2012-02-29 16:55:48 -05:00 committed by Gerrit
parent fbcc357e04
commit d91df7a1c2

View File

@ -105,57 +105,69 @@ function learn_arch() {
info "Target reports arch is ${FLAGS_arch}" info "Target reports arch is ${FLAGS_arch}"
} }
# Checks to see if pid $1 is running. # Checks whether a remote device has rebooted successfully.
function is_pid_running() { #
ps -p ${1} 2>&1 > /dev/null # This uses a rapidly-retried SSH connection, which will wait for at most
} # about ten seconds. If the network returns an error (e.g. host unreachable)
# the actual delay may be shorter.
# Wait function given an additional timeout argument. #
# $1 - pid to wait on. # Return values:
# $2 - timeout to wait for. # 0: The device has rebooted successfully
function wait_with_timeout() { # 1: The device has not yet rebooted
local pid=$1 # 255: Unable to communicate with the device
local timeout=$2 function _check_if_rebooted() {
local -r TIMEOUT_INC=1 (
local current_timeout=0 # In my tests SSH seems to be waiting rather longer than would be expected
while is_pid_running ${pid} && [ ${current_timeout} -lt ${timeout} ]; do # from these parameters. These values produce a ~10 second wait.
sleep ${TIMEOUT_INC} # (in a subshell to avoid clobbering the global settings)
current_timeout=$((current_timeout + TIMEOUT_INC)) SSH_CONNECT_SETTINGS="$(sed \
done -e 's/\(ConnectTimeout\)=[0-9]*/\1=2/' \
! is_pid_running ${pid} -e 's/\(ConnectionAttempts\)=[0-9]*/\1=2/' \
} <<<"${SSH_CONNECT_SETTINGS}")"
remote_sh_allow_changed_host_key -q -- '[ ! -e /tmp/awaiting_reboot ]'
# Checks to see if a machine has rebooted using the presence of a tmp file. )
function check_if_rebooted() {
local output_file="${TMP}/output"
while true; do
REMOTE_OUT=""
# This may fail while the machine is down so generate output and a
# boolean result to distinguish between down/timeout and real failure
! remote_sh_allow_changed_host_key \
"echo 0; [ -e /tmp/awaiting_reboot ] && echo '1'; true"
echo "${REMOTE_OUT}" > "${output_file}"
if grep -q "0" "${output_file}"; then
if grep -q "1" "${output_file}"; then
info "Not yet rebooted"
sleep .5
else
info "Rebooted and responding"
break
fi
fi
done
} }
# Triggers a reboot on a remote device and waits for it to complete.
#
# This function will not return until the SSH server on the remote device
# is available after the reboot.
#
function remote_reboot() { function remote_reboot() {
info "Rebooting." info "Rebooting ${FLAGS_remote}..."
remote_sh "touch /tmp/awaiting_reboot; reboot" remote_sh "touch /tmp/awaiting_reboot; reboot"
while true; do local start_time=${SECONDS}
check_if_rebooted &
local pid=$! # Wait for five seconds before we start polling
wait_with_timeout ${pid} 30 && break sleep 5
! kill -9 ${pid} 2> /dev/null
# Add a hard timeout of 5 minutes before giving up.
local timeout=300
local timeout_expiry=$(( start_time + timeout ))
while [ ${SECONDS} -lt ${timeout_expiry} ]; do
# Used to throttle the loop -- see step_remaining_time at the bottom.
local step_start_time=${SECONDS}
local status=0
_check_if_rebooted || status=$?
local elapsed=$(( SECONDS - start_time ))
case ${status} in
0) printf ' %4ds: reboot complete\n' ${elapsed} >&2 ; return 0 ;;
1) printf ' %4ds: device has not yet shut down\n' ${elapsed} >&2 ;;
255) printf ' %4ds: can not connect to device\n' ${elapsed} >&2 ;;
*) die " internal error" ;;
esac
# To keep the loop from spinning too fast, delay until it has taken at
# least five seconds. When we are actively trying SSH connections this
# should never happen.
local step_remaining_time=$(( step_start_time + 5 - SECONDS ))
if [ ${step_remaining_time} -gt 0 ]; then
sleep ${step_remaining_time}
fi
done done
die "Reboot has not completed after ${timeout} seconds; giving up."
} }
# Called by clients before exiting. # Called by clients before exiting.