From fa3105efd6a42ab109bdabf907b18ea066fbd32a Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Mon, 4 May 2026 15:06:43 +0000 Subject: [PATCH] .github: capture diagnostics and run both macOS exit-status test steps The macos-ssh-integrationtest job was failing in under a second with no usable log output, and the job's two test steps were sequenced so a failure in the first skipped the second. Make both deficiencies go away. Preflight diagnostics step dumps uname, sw_vers, id, id runner, ssh -V, dscl . -read /Users/runner UserShell, /etc/ssh listing, sudo capability, and PATH. This is the environment context every future failure on this runner will need to debug, captured before the test binary even runs. PATH propagation through sudo is now an explicit, expanded list (/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH) instead of inheriting the GitHub Actions PATH, which doesn't always include /usr/sbin where macOS keeps utilities the incubator may invoke. Both test steps now use continue-on-error: true with id-tagged outcomes, and a final aggregator step fails the job if either was not 'success'. Previously a failure in the OpenSSH step skipped the Go SSH step entirely; we lost half the signal on every failed run. Test output is tee'd to /tmp/openssh-exitcodes.log and /tmp/gossh-exitcodes.log. The incubator log /tmp/tailscalessh.log is sudo-copied (it's owned by root after sudo) and made readable. All three are uploaded as a macos-ssh-integrationtest-logs artifact retained 14 days. actions/upload-artifact pinned to v7.0.0 to match the rest of the repo. The per-step timeout is bumped from 3m to 5m to give the in-test retry logic (up to 3 attempts) headroom without hitting the workflow timeout before the final attempt completes. Updates #18256 Signed-off-by: Kristoffer Dalby --- .github/workflows/ssh-integrationtest.yml | 90 ++++++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ssh-integrationtest.yml b/.github/workflows/ssh-integrationtest.yml index 0c51648f4..2d0f0b115 100644 --- a/.github/workflows/ssh-integrationtest.yml +++ b/.github/workflows/ssh-integrationtest.yml @@ -43,13 +43,99 @@ jobs: steps: - name: Check out code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + # Capture environment context up front so future failures can be + # diagnosed from the artifact alone, even if the test binary fails + # before producing useful output of its own. + - name: macOS preflight diagnostics + run: | + set +e + echo "=== uname -a ===" + uname -a + echo "=== sw_vers ===" + sw_vers + echo "=== id ===" + id + echo "=== id runner ===" + id runner + echo "=== ssh -V ===" + ssh -V + echo "=== which ssh ===" + which ssh + echo "=== dscl . -read /Users/runner UserShell ===" + dscl . -read /Users/runner UserShell + echo "=== /etc/ssh listing ===" + ls -la /etc/ssh/ || true + echo "=== sudo -n true ===" + sudo -n true && echo "passwordless sudo OK" || echo "passwordless sudo NOT available" + echo "=== PATH ===" + echo "$PATH" + - name: Build test binaries run: | ./tool/go test -tags integrationtest -c ./ssh/tailssh -o /tmp/tailssh.test ./tool/go build -o /tmp/tailscaled ./cmd/tailscaled + ls -l /tmp/tailssh.test /tmp/tailscaled + + # The two test steps below are wired with continue-on-error so a + # failure in one still lets the other run. The aggregator step at + # the end fails the job if either failed. We need both: the Go SSH + # client test exercises the wire-level frame ordering, and the + # OpenSSH client test exercises the actual binary that users of + # #18256 are running. + - name: Run macOS OpenSSH exit status integration tests + id: openssh + continue-on-error: true run: | - sudo env "PATH=$PATH" TAILSCALED_PATH=/tmp/tailscaled TS_SSH_INTEGRATION_TEST_USER=runner /tmp/tailssh.test -test.v -test.timeout=3m -test.run '^TestOpenSSHExitCodes$' + set -o pipefail + sudo env \ + PATH=/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH \ + TAILSCALED_PATH=/tmp/tailscaled \ + TS_SSH_INTEGRATION_TEST_USER=runner \ + /tmp/tailssh.test -test.v -test.timeout=5m -test.run '^TestOpenSSHExitCodes$' \ + 2>&1 | tee /tmp/openssh-exitcodes.log + - name: Run macOS Go SSH exit status integration tests + id: gossh + continue-on-error: true run: | - sudo env "PATH=$PATH" TAILSCALED_PATH=/tmp/tailscaled TS_SSH_INTEGRATION_TEST_USER=runner /tmp/tailssh.test -test.v -test.timeout=3m -test.run '^TestIntegrationExitCodes$' + set -o pipefail + sudo env \ + PATH=/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:$PATH \ + TAILSCALED_PATH=/tmp/tailscaled \ + TS_SSH_INTEGRATION_TEST_USER=runner \ + /tmp/tailssh.test -test.v -test.timeout=5m -test.run '^TestIntegrationExitCodes$' \ + 2>&1 | tee /tmp/gossh-exitcodes.log + + - name: Collect incubator log + if: always() + run: | + if [ -f /tmp/tailscalessh.log ]; then + sudo cp /tmp/tailscalessh.log /tmp/tailscalessh.log.copy || true + sudo chmod a+r /tmp/tailscalessh.log.copy || true + else + echo "no incubator log produced" > /tmp/tailscalessh.log.copy + fi + + - name: Upload diagnostic artifacts + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: macos-ssh-integrationtest-logs + path: | + /tmp/openssh-exitcodes.log + /tmp/gossh-exitcodes.log + /tmp/tailscalessh.log.copy + if-no-files-found: warn + retention-days: 14 + + - name: Aggregate test result + run: | + openssh="${{ steps.openssh.outcome }}" + gossh="${{ steps.gossh.outcome }}" + echo "OpenSSH test outcome: $openssh" + echo "Go SSH test outcome: $gossh" + if [ "$openssh" != "success" ] || [ "$gossh" != "success" ]; then + exit 1 + fi