mirror of
https://github.com/flatcar/scripts.git
synced 2025-09-22 06:01:41 +02:00
remote_access: Clean up output from remote_reboot
Previously the remote_reboot function produced a large number of warnings and errors from the ssh process. This replaces it with a quieter version that periodically (every 5-10s) echos its status. BUG=None TEST=Few iterations with x86-alex via a test script and update_kernel. Change-Id: Id606da64a7e55e902e4119e1dfc9f76cf27d5b5e Reviewed-on: https://gerrit.chromium.org/gerrit/17139 Commit-Ready: Chris Wolfe <cwolfe@chromium.org> Reviewed-by: Chris Wolfe <cwolfe@chromium.org> Tested-by: Chris Wolfe <cwolfe@chromium.org>
This commit is contained in:
parent
fbcc357e04
commit
d91df7a1c2
104
remote_access.sh
104
remote_access.sh
@ -105,57 +105,69 @@ function learn_arch() {
|
||||
info "Target reports arch is ${FLAGS_arch}"
|
||||
}
|
||||
|
||||
# Checks to see if pid $1 is running.
|
||||
function is_pid_running() {
|
||||
ps -p ${1} 2>&1 > /dev/null
|
||||
}
|
||||
|
||||
# Wait function given an additional timeout argument.
|
||||
# $1 - pid to wait on.
|
||||
# $2 - timeout to wait for.
|
||||
function wait_with_timeout() {
|
||||
local pid=$1
|
||||
local timeout=$2
|
||||
local -r TIMEOUT_INC=1
|
||||
local current_timeout=0
|
||||
while is_pid_running ${pid} && [ ${current_timeout} -lt ${timeout} ]; do
|
||||
sleep ${TIMEOUT_INC}
|
||||
current_timeout=$((current_timeout + TIMEOUT_INC))
|
||||
done
|
||||
! is_pid_running ${pid}
|
||||
}
|
||||
|
||||
# Checks to see if a machine has rebooted using the presence of a tmp file.
|
||||
function check_if_rebooted() {
|
||||
local output_file="${TMP}/output"
|
||||
while true; do
|
||||
REMOTE_OUT=""
|
||||
# This may fail while the machine is down so generate output and a
|
||||
# boolean result to distinguish between down/timeout and real failure
|
||||
! remote_sh_allow_changed_host_key \
|
||||
"echo 0; [ -e /tmp/awaiting_reboot ] && echo '1'; true"
|
||||
echo "${REMOTE_OUT}" > "${output_file}"
|
||||
if grep -q "0" "${output_file}"; then
|
||||
if grep -q "1" "${output_file}"; then
|
||||
info "Not yet rebooted"
|
||||
sleep .5
|
||||
else
|
||||
info "Rebooted and responding"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
# Checks whether a remote device has rebooted successfully.
|
||||
#
|
||||
# This uses a rapidly-retried SSH connection, which will wait for at most
|
||||
# about ten seconds. If the network returns an error (e.g. host unreachable)
|
||||
# the actual delay may be shorter.
|
||||
#
|
||||
# Return values:
|
||||
# 0: The device has rebooted successfully
|
||||
# 1: The device has not yet rebooted
|
||||
# 255: Unable to communicate with the device
|
||||
function _check_if_rebooted() {
|
||||
(
|
||||
# In my tests SSH seems to be waiting rather longer than would be expected
|
||||
# from these parameters. These values produce a ~10 second wait.
|
||||
# (in a subshell to avoid clobbering the global settings)
|
||||
SSH_CONNECT_SETTINGS="$(sed \
|
||||
-e 's/\(ConnectTimeout\)=[0-9]*/\1=2/' \
|
||||
-e 's/\(ConnectionAttempts\)=[0-9]*/\1=2/' \
|
||||
<<<"${SSH_CONNECT_SETTINGS}")"
|
||||
remote_sh_allow_changed_host_key -q -- '[ ! -e /tmp/awaiting_reboot ]'
|
||||
)
|
||||
}
|
||||
|
||||
# Triggers a reboot on a remote device and waits for it to complete.
|
||||
#
|
||||
# This function will not return until the SSH server on the remote device
|
||||
# is available after the reboot.
|
||||
#
|
||||
function remote_reboot() {
|
||||
info "Rebooting."
|
||||
info "Rebooting ${FLAGS_remote}..."
|
||||
remote_sh "touch /tmp/awaiting_reboot; reboot"
|
||||
while true; do
|
||||
check_if_rebooted &
|
||||
local pid=$!
|
||||
wait_with_timeout ${pid} 30 && break
|
||||
! kill -9 ${pid} 2> /dev/null
|
||||
local start_time=${SECONDS}
|
||||
|
||||
# Wait for five seconds before we start polling
|
||||
sleep 5
|
||||
|
||||
# Add a hard timeout of 5 minutes before giving up.
|
||||
local timeout=300
|
||||
local timeout_expiry=$(( start_time + timeout ))
|
||||
while [ ${SECONDS} -lt ${timeout_expiry} ]; do
|
||||
# Used to throttle the loop -- see step_remaining_time at the bottom.
|
||||
local step_start_time=${SECONDS}
|
||||
|
||||
local status=0
|
||||
_check_if_rebooted || status=$?
|
||||
|
||||
local elapsed=$(( SECONDS - start_time ))
|
||||
case ${status} in
|
||||
0) printf ' %4ds: reboot complete\n' ${elapsed} >&2 ; return 0 ;;
|
||||
1) printf ' %4ds: device has not yet shut down\n' ${elapsed} >&2 ;;
|
||||
255) printf ' %4ds: can not connect to device\n' ${elapsed} >&2 ;;
|
||||
*) die " internal error" ;;
|
||||
esac
|
||||
|
||||
# To keep the loop from spinning too fast, delay until it has taken at
|
||||
# least five seconds. When we are actively trying SSH connections this
|
||||
# should never happen.
|
||||
local step_remaining_time=$(( step_start_time + 5 - SECONDS ))
|
||||
if [ ${step_remaining_time} -gt 0 ]; then
|
||||
sleep ${step_remaining_time}
|
||||
fi
|
||||
done
|
||||
die "Reboot has not completed after ${timeout} seconds; giving up."
|
||||
}
|
||||
|
||||
# Called by clients before exiting.
|
||||
|
Loading…
x
Reference in New Issue
Block a user