mirror of
https://github.com/flatcar/scripts.git
synced 2025-09-22 22:21:10 +02:00
remote_access: Clean up output from remote_reboot
Previously the remote_reboot function produced a large number of warnings and errors from the ssh process. This replaces it with a quieter version that periodically (every 5-10s) echos its status. BUG=None TEST=Few iterations with x86-alex via a test script and update_kernel. Change-Id: Id606da64a7e55e902e4119e1dfc9f76cf27d5b5e Reviewed-on: https://gerrit.chromium.org/gerrit/17139 Commit-Ready: Chris Wolfe <cwolfe@chromium.org> Reviewed-by: Chris Wolfe <cwolfe@chromium.org> Tested-by: Chris Wolfe <cwolfe@chromium.org>
This commit is contained in:
parent
fbcc357e04
commit
d91df7a1c2
104
remote_access.sh
104
remote_access.sh
@ -105,57 +105,69 @@ function learn_arch() {
|
|||||||
info "Target reports arch is ${FLAGS_arch}"
|
info "Target reports arch is ${FLAGS_arch}"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Checks to see if pid $1 is running.
|
# Checks whether a remote device has rebooted successfully.
|
||||||
function is_pid_running() {
|
#
|
||||||
ps -p ${1} 2>&1 > /dev/null
|
# This uses a rapidly-retried SSH connection, which will wait for at most
|
||||||
}
|
# about ten seconds. If the network returns an error (e.g. host unreachable)
|
||||||
|
# the actual delay may be shorter.
|
||||||
# Wait function given an additional timeout argument.
|
#
|
||||||
# $1 - pid to wait on.
|
# Return values:
|
||||||
# $2 - timeout to wait for.
|
# 0: The device has rebooted successfully
|
||||||
function wait_with_timeout() {
|
# 1: The device has not yet rebooted
|
||||||
local pid=$1
|
# 255: Unable to communicate with the device
|
||||||
local timeout=$2
|
function _check_if_rebooted() {
|
||||||
local -r TIMEOUT_INC=1
|
(
|
||||||
local current_timeout=0
|
# In my tests SSH seems to be waiting rather longer than would be expected
|
||||||
while is_pid_running ${pid} && [ ${current_timeout} -lt ${timeout} ]; do
|
# from these parameters. These values produce a ~10 second wait.
|
||||||
sleep ${TIMEOUT_INC}
|
# (in a subshell to avoid clobbering the global settings)
|
||||||
current_timeout=$((current_timeout + TIMEOUT_INC))
|
SSH_CONNECT_SETTINGS="$(sed \
|
||||||
done
|
-e 's/\(ConnectTimeout\)=[0-9]*/\1=2/' \
|
||||||
! is_pid_running ${pid}
|
-e 's/\(ConnectionAttempts\)=[0-9]*/\1=2/' \
|
||||||
}
|
<<<"${SSH_CONNECT_SETTINGS}")"
|
||||||
|
remote_sh_allow_changed_host_key -q -- '[ ! -e /tmp/awaiting_reboot ]'
|
||||||
# Checks to see if a machine has rebooted using the presence of a tmp file.
|
)
|
||||||
function check_if_rebooted() {
|
|
||||||
local output_file="${TMP}/output"
|
|
||||||
while true; do
|
|
||||||
REMOTE_OUT=""
|
|
||||||
# This may fail while the machine is down so generate output and a
|
|
||||||
# boolean result to distinguish between down/timeout and real failure
|
|
||||||
! remote_sh_allow_changed_host_key \
|
|
||||||
"echo 0; [ -e /tmp/awaiting_reboot ] && echo '1'; true"
|
|
||||||
echo "${REMOTE_OUT}" > "${output_file}"
|
|
||||||
if grep -q "0" "${output_file}"; then
|
|
||||||
if grep -q "1" "${output_file}"; then
|
|
||||||
info "Not yet rebooted"
|
|
||||||
sleep .5
|
|
||||||
else
|
|
||||||
info "Rebooted and responding"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Triggers a reboot on a remote device and waits for it to complete.
|
||||||
|
#
|
||||||
|
# This function will not return until the SSH server on the remote device
|
||||||
|
# is available after the reboot.
|
||||||
|
#
|
||||||
function remote_reboot() {
|
function remote_reboot() {
|
||||||
info "Rebooting."
|
info "Rebooting ${FLAGS_remote}..."
|
||||||
remote_sh "touch /tmp/awaiting_reboot; reboot"
|
remote_sh "touch /tmp/awaiting_reboot; reboot"
|
||||||
while true; do
|
local start_time=${SECONDS}
|
||||||
check_if_rebooted &
|
|
||||||
local pid=$!
|
# Wait for five seconds before we start polling
|
||||||
wait_with_timeout ${pid} 30 && break
|
sleep 5
|
||||||
! kill -9 ${pid} 2> /dev/null
|
|
||||||
|
# Add a hard timeout of 5 minutes before giving up.
|
||||||
|
local timeout=300
|
||||||
|
local timeout_expiry=$(( start_time + timeout ))
|
||||||
|
while [ ${SECONDS} -lt ${timeout_expiry} ]; do
|
||||||
|
# Used to throttle the loop -- see step_remaining_time at the bottom.
|
||||||
|
local step_start_time=${SECONDS}
|
||||||
|
|
||||||
|
local status=0
|
||||||
|
_check_if_rebooted || status=$?
|
||||||
|
|
||||||
|
local elapsed=$(( SECONDS - start_time ))
|
||||||
|
case ${status} in
|
||||||
|
0) printf ' %4ds: reboot complete\n' ${elapsed} >&2 ; return 0 ;;
|
||||||
|
1) printf ' %4ds: device has not yet shut down\n' ${elapsed} >&2 ;;
|
||||||
|
255) printf ' %4ds: can not connect to device\n' ${elapsed} >&2 ;;
|
||||||
|
*) die " internal error" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# To keep the loop from spinning too fast, delay until it has taken at
|
||||||
|
# least five seconds. When we are actively trying SSH connections this
|
||||||
|
# should never happen.
|
||||||
|
local step_remaining_time=$(( step_start_time + 5 - SECONDS ))
|
||||||
|
if [ ${step_remaining_time} -gt 0 ]; then
|
||||||
|
sleep ${step_remaining_time}
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
|
die "Reboot has not completed after ${timeout} seconds; giving up."
|
||||||
}
|
}
|
||||||
|
|
||||||
# Called by clients before exiting.
|
# Called by clients before exiting.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user