From 09c324b563a55696113e4600bf033c87830e367e Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@tailscale.com>
Date: Sun, 5 Apr 2026 01:06:55 +0000
Subject: [PATCH] ssh/tailssh: speed up SSH integration tests

Parallelize the SSH integration tests across OS targets and reduce
per-container overhead:

- CI: use GitHub Actions matrix strategy to run all 4 OS containers
  (ubuntu:focal, ubuntu:jammy, ubuntu:noble, alpine:latest) in parallel
  instead of sequentially (~4x wall-clock improvement)

- Makefile: run docker builds in parallel for local dev too

- Dockerfile: consolidate ~20 separate RUN commands into 5 (one per
  test phase), eliminating Docker layer overhead. Combine test binary
  invocations where no state mutation is needed between them. Fix a bug
  where TestDoDropPrivileges was silently not being run (was passed as a
  second positional arg to -test.run instead of using regex alternation).

- TestMain: replace tail -F + 2s sleep with synchronous log read,
  eliminating 2s overhead per test binary invocation. Set debugTest once
  in TestMain instead of redundantly in each test function.

- session.read(): close channel on EOF so non-shell tests return
  immediately instead of waiting for the 1s silence timeout.

Updates #19244

Change-Id: I2cc8588964fbce0dd7b654fb94e7ff33440b8584
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
---
 .github/workflows/ssh-integrationtest.yml | 24 +++++-
 Makefile                                  | 10 ++-
 ssh/tailssh/tailssh_integration_test.go   | 88 +++++++--------------
 ssh/tailssh/testcontainers/Dockerfile     | 94 ++++++++++++-----------
 4 files changed, 101 insertions(+), 115 deletions(-)

diff --git a/.github/workflows/ssh-integrationtest.yml b/.github/workflows/ssh-integrationtest.yml
index afe2dd2f7..84432cd72 100644
--- a/.github/workflows/ssh-integrationtest.yml
+++ b/.github/workflows/ssh-integrationtest.yml
@@ -1,5 +1,5 @@
-# Run the ssh integration tests with `make sshintegrationtest`.
-# These tests can also be running locally.
+# Run the ssh integration tests in various Docker containers.
+# These tests can also be run locally via `make sshintegrationtest`.
 name: "ssh-integrationtest"
 
 concurrency:
@@ -15,9 +15,25 @@ on:
 jobs:
   ssh-integrationtest:
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - base: "ubuntu:focal"
+            tag: "ssh-ubuntu-focal"
+          - base: "ubuntu:jammy"
+            tag: "ssh-ubuntu-jammy"
+          - base: "ubuntu:noble"
+            tag: "ssh-ubuntu-noble"
+          - base: "alpine:latest"
+            tag: "ssh-alpine-latest"
     steps:
       - name: Check out code
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-      - name: Run SSH integration tests
+      - name: Build test binaries
         run: |
-          make sshintegrationtest
\ No newline at end of file
+          GOOS=linux GOARCH=amd64 CGO_ENABLED=0 ./tool/go test -tags integrationtest -c ./ssh/tailssh -o ssh/tailssh/testcontainers/tailssh.test
+          GOOS=linux GOARCH=amd64 CGO_ENABLED=0 ./tool/go build -o ssh/tailssh/testcontainers/tailscaled ./cmd/tailscaled
+      - name: Run SSH integration tests (${{ matrix.base }})
+        run: |
+          docker build --build-arg="BASE=${{ matrix.base }}" -t "${{ matrix.tag }}" ssh/tailssh/testcontainers
diff --git a/Makefile b/Makefile
index b78ef0469..1f469b887 100644
--- a/Makefile
+++ b/Makefile
@@ -137,10 +137,12 @@ publishdevproxy: check-image-repo ## Build and publish k8s-proxy image to locati
 sshintegrationtest: ## Run the SSH integration tests in various Docker containers
 	@GOOS=linux GOARCH=amd64 CGO_ENABLED=0 ./tool/go test -tags integrationtest -c ./ssh/tailssh -o ssh/tailssh/testcontainers/tailssh.test && \
 	GOOS=linux GOARCH=amd64 CGO_ENABLED=0 ./tool/go build -o ssh/tailssh/testcontainers/tailscaled ./cmd/tailscaled && \
-	echo "Testing on ubuntu:focal" && docker build --build-arg="BASE=ubuntu:focal" -t ssh-ubuntu-focal ssh/tailssh/testcontainers && \
-	echo "Testing on ubuntu:jammy" && docker build --build-arg="BASE=ubuntu:jammy" -t ssh-ubuntu-jammy ssh/tailssh/testcontainers && \
-	echo "Testing on ubuntu:noble" && docker build --build-arg="BASE=ubuntu:noble" -t ssh-ubuntu-noble ssh/tailssh/testcontainers && \
-	echo "Testing on alpine:latest" && docker build --build-arg="BASE=alpine:latest" -t ssh-alpine-latest ssh/tailssh/testcontainers
+	echo "Testing on ubuntu:focal, ubuntu:jammy, ubuntu:noble, alpine:latest (in parallel)" && \
+	docker build --build-arg="BASE=ubuntu:focal" -t ssh-ubuntu-focal ssh/tailssh/testcontainers & \
+	docker build --build-arg="BASE=ubuntu:jammy" -t ssh-ubuntu-jammy ssh/tailssh/testcontainers & \
+	docker build --build-arg="BASE=ubuntu:noble" -t ssh-ubuntu-noble ssh/tailssh/testcontainers & \
+	docker build --build-arg="BASE=alpine:latest" -t ssh-alpine-latest ssh/tailssh/testcontainers & \
+	wait
 
 .PHONY: generate
 generate: ## Generate code
diff --git a/ssh/tailssh/tailssh_integration_test.go b/ssh/tailssh/tailssh_integration_test.go
index 1eb79b37e..a69c89c9f 100644
--- a/ssh/tailssh/tailssh_integration_test.go
+++ b/ssh/tailssh/tailssh_integration_test.go
@@ -6,7 +6,6 @@
 package tailssh
 
 import (
-	"bufio"
 	"bytes"
 	"context"
 	"crypto/rand"
@@ -60,56 +59,33 @@ import (
 var testVarRoot string
 
 func TestMain(m *testing.M) {
+	debugTest.Store(true)
+
+	// Create our log file.
+	if err := os.WriteFile("/tmp/tailscalessh.log", nil, 0666); err != nil {
+		log.Fatal(err)
+	}
+
+	// Create a temp directory for SSH host keys.
 	var err error
 	testVarRoot, err = os.MkdirTemp("", "tailssh-test-var")
 	if err != nil {
 		log.Fatal(err)
 	}
-	defer os.RemoveAll(testVarRoot)
 
-	// Create our log file.
-	file, err := os.OpenFile("/tmp/tailscalessh.log", os.O_CREATE|os.O_WRONLY, 0666)
-	if err != nil {
-		log.Fatal(err)
-	}
-	file.Close()
+	code := m.Run()
 
-	// Tail our log file.
-	cmd := exec.Command("tail", "-F", "/tmp/tailscalessh.log")
+	os.RemoveAll(testVarRoot)
 
-	r, err := cmd.StdoutPipe()
-	if err != nil {
-		return
+	// Print any log output from the incubator subprocesses.
+	if b, err := os.ReadFile("/tmp/tailscalessh.log"); err == nil && len(b) > 0 {
+		log.Print(string(b))
 	}
 
-	scanner := bufio.NewScanner(r)
-	go func() {
-		for scanner.Scan() {
-			line := scanner.Text()
-			log.Println(line)
-		}
-	}()
-
-	err = cmd.Start()
-	if err != nil {
-		return
-	}
-	defer func() {
-		// tail -f has a default sleep interval of 1 second, so it takes a
-		// moment for it to finish reading our log file after we've terminated.
-		// So, wait a bit to let it catch up.
-		time.Sleep(2 * time.Second)
-	}()
-
-	m.Run()
+	os.Exit(code)
 }
 
 func TestIntegrationSSH(t *testing.T) {
-	debugTest.Store(true)
-	t.Cleanup(func() {
-		debugTest.Store(false)
-	})
-
 	homeDir := "/home/testuser"
 	if runtime.GOOS == "darwin" {
 		homeDir = "/Users/testuser"
@@ -215,11 +191,6 @@ func TestIntegrationSSH(t *testing.T) {
 }
 
 func TestIntegrationSFTP(t *testing.T) {
-	debugTest.Store(true)
-	t.Cleanup(func() {
-		debugTest.Store(false)
-	})
-
 	for _, forceV1Behavior := range []bool{false, true} {
 		name := "v2"
 		if forceV1Behavior {
@@ -276,11 +247,6 @@ func TestIntegrationSFTP(t *testing.T) {
 }
 
 func TestIntegrationSCP(t *testing.T) {
-	debugTest.Store(true)
-	t.Cleanup(func() {
-		debugTest.Store(false)
-	})
-
 	for _, forceV1Behavior := range []bool{false, true} {
 		name := "v2"
 		if forceV1Behavior {
@@ -334,11 +300,6 @@ func TestIntegrationSCP(t *testing.T) {
 }
 
 func TestSSHAgentForwarding(t *testing.T) {
-	debugTest.Store(true)
-	t.Cleanup(func() {
-		debugTest.Store(false)
-	})
-
 	// Create a client SSH key
 	tmpDir, err := os.MkdirTemp("", "")
 	if err != nil {
@@ -428,11 +389,6 @@ func TestSSHAgentForwarding(t *testing.T) {
 // request 'none' auth and instead immediately authenticate with a public key
 // or password.
 func TestIntegrationParamiko(t *testing.T) {
-	debugTest.Store(true)
-	t.Cleanup(func() {
-		debugTest.Store(false)
-	})
-
 	addr := testServer(t, "testuser", true, false)
 	host, port, err := net.SplitHostPort(addr)
 	if err != nil {
@@ -509,26 +465,34 @@ func (s *session) run(t *testing.T, cmdString string, shell bool) string {
 func (s *session) read() string {
 	ch := make(chan []byte)
 	go func() {
+		defer close(ch)
 		for {
 			b := make([]byte, 1)
 			n, err := s.stdout.Read(b)
 			if n > 0 {
 				ch <- b
 			}
-			if err == io.EOF {
+			if err != nil {
 				return
 			}
 		}
 	}()
 
 	// Read first byte in blocking fashion.
-	_got := <-ch
+	b, ok := <-ch
+	if !ok {
+		return ""
+	}
+	_got := b
 
-	// Read subsequent bytes in non-blocking fashion.
+	// Read subsequent bytes until EOF or silence.
 readLoop:
 	for {
 		select {
-		case b := <-ch:
+		case b, ok := <-ch:
+			if !ok {
+				break readLoop
+			}
 			_got = append(_got, b...)
 		case <-time.After(1 * time.Second):
 			break readLoop
diff --git a/ssh/tailssh/testcontainers/Dockerfile b/ssh/tailssh/testcontainers/Dockerfile
index 4ef1c1eb0..6d915b8af 100644
--- a/ssh/tailssh/testcontainers/Dockerfile
+++ b/ssh/tailssh/testcontainers/Dockerfile
@@ -28,60 +28,64 @@ COPY tailssh.test .
 
 RUN chmod 755 tailscaled
 
-RUN echo "First run tests normally."
-RUN eval `ssh-agent -s` && TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestSSHAgentForwarding
-RUN if echo "$BASE" | grep "ubuntu:"; then rm -Rf /home/testuser; fi
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegrationSFTP
-RUN if echo "$BASE" | grep "ubuntu:"; then rm -Rf /home/testuser; fi
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegrationSCP
-RUN if echo "$BASE" | grep "ubuntu:"; then rm -Rf /home/testuser; fi
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegrationSSH
-RUN if echo "$BASE" | grep "ubuntu:"; then rm -Rf /home/testuser; fi
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegrationParamiko
+# Run tests normally.
+# On Ubuntu, delete testuser's home directory between tests to verify
+# that PAM's pam_mkhomedir recreates it each time.
+RUN set -e && \
+    eval $(ssh-agent -s) && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestSSHAgentForwarding && \
+    if echo "$BASE" | grep -q "ubuntu:"; then rm -Rf /home/testuser; fi && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestIntegrationSFTP && \
+    if echo "$BASE" | grep -q "ubuntu:"; then rm -Rf /home/testuser; fi && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestIntegrationSCP && \
+    if echo "$BASE" | grep -q "ubuntu:"; then rm -Rf /home/testuser; fi && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestIntegrationSSH && \
+    if echo "$BASE" | grep -q "ubuntu:"; then rm -Rf /home/testuser; fi && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestIntegrationParamiko
 
-RUN echo "Then run tests as non-root user testuser and make sure tests still pass."
-RUN touch /tmp/tailscalessh.log
-RUN chown testuser:groupone /tmp/tailscalessh.log
-RUN TAILSCALED_PATH=`pwd`tailscaled eval `su -m testuser -c ssh-agent -s` && su -m testuser -c "./tailssh.test -test.v -test.run TestSSHAgentForwarding"
-RUN TAILSCALED_PATH=`pwd`tailscaled su -m testuser -c "./tailssh.test -test.v -test.run TestIntegration TestDoDropPrivileges"
-RUN echo "Also, deny everyone access to the user's home directory and make sure non file-related tests still pass."
-RUN mkdir -p /home/testuser && chown testuser:groupone /home/testuser && chmod 0000 /home/testuser
-RUN TAILSCALED_PATH=`pwd`tailscaled SKIP_FILE_OPS=1 su -m testuser -c "./tailssh.test -test.v -test.run TestIntegrationSSH"
-RUN chmod 0755 /home/testuser
-RUN chown root:root /tmp/tailscalessh.log
+# Run tests as non-root user testuser and make sure tests still pass.
+RUN set -e && \
+    touch /tmp/tailscalessh.log && \
+    chown testuser:groupone /tmp/tailscalessh.log && \
+    export TAILSCALED_PATH=$(pwd)/tailscaled && \
+    eval $(su -m testuser -c "ssh-agent -s") && \
+    su -m testuser -c "./tailssh.test -test.v -test.run 'TestSSHAgentForwarding|TestIntegration|TestDoDropPrivileges'" && \
+    echo "Also, deny everyone access to the user's home directory and make sure non file-related tests still pass." && \
+    mkdir -p /home/testuser && chown testuser:groupone /home/testuser && chmod 0000 /home/testuser && \
+    SKIP_FILE_OPS=1 su -m testuser -c "./tailssh.test -test.v -test.run TestIntegrationSSH" && \
+    chmod 0755 /home/testuser && \
+    chown root:root /tmp/tailscalessh.log
 
-RUN if echo "$BASE" | grep "ubuntu:"; then \
-    echo "Then run tests in a system that's pretending to be SELinux in enforcing mode" && \
-    # Remove execute permissions for /usr/bin/login so that it fails.
+# On Ubuntu, run tests pretending to be SELinux in enforcing mode.
+RUN if echo "$BASE" | grep -q "ubuntu:"; then \
+    set -e && \
+    echo "Run tests in a system that's pretending to be SELinux in enforcing mode" && \
     mv /usr/bin/login /tmp/login_orig && \
-    # Use nonsense for /usr/bin/login so that it fails.
-    # It's not the same failure mode as in SELinux, but failure is good enough for test.
     echo "adsfasdfasdf" > /usr/bin/login && \
     chmod 755 /usr/bin/login && \
-    # Simulate getenforce command
     printf "#!/bin/bash\necho 'Enforcing'" > /usr/bin/getenforce && \
     chmod 755 /usr/bin/getenforce && \
-    eval `ssh-agent -s` && TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestSSHAgentForwarding && \
-    TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegration && \
+    eval $(ssh-agent -s) && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run 'TestSSHAgentForwarding|TestIntegration' && \
     mv /tmp/login_orig /usr/bin/login && \
     rm /usr/bin/getenforce \
     ; fi
 
-RUN echo "Then remove the login command and make sure tests still pass."
-RUN rm `which login`
-RUN eval `ssh-agent -s` && TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestSSHAgentForwarding
-RUN if echo "$BASE" | grep "ubuntu:"; then rm -Rf /home/testuser; fi
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegrationSFTP
-RUN if echo "$BASE" | grep "ubuntu:"; then rm -Rf /home/testuser; fi
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegrationSCP
-RUN if echo "$BASE" | grep "ubuntu:"; then rm -Rf /home/testuser; fi
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegrationSSH
+# Remove the login command and make sure tests still pass.
+RUN set -e && \
+    rm $(which login) && \
+    eval $(ssh-agent -s) && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestSSHAgentForwarding && \
+    if echo "$BASE" | grep -q "ubuntu:"; then rm -Rf /home/testuser; fi && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestIntegrationSFTP && \
+    if echo "$BASE" | grep -q "ubuntu:"; then rm -Rf /home/testuser; fi && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestIntegrationSCP && \
+    if echo "$BASE" | grep -q "ubuntu:"; then rm -Rf /home/testuser; fi && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run TestIntegrationSSH
 
-RUN echo "Then remove the su command and make sure tests still pass."
-RUN chown root:root /tmp/tailscalessh.log
-RUN rm `which su`
-RUN eval `ssh-agent -s` && TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestSSHAgentForwarding
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestIntegration
-
-RUN echo "Test doDropPrivileges"
-RUN TAILSCALED_PATH=`pwd`tailscaled ./tailssh.test -test.v -test.run TestDoDropPrivileges
+# Remove the su command and make sure tests still pass.
+RUN set -e && \
+    chown root:root /tmp/tailscalessh.log && \
+    rm $(which su) && \
+    eval $(ssh-agent -s) && \
+    TAILSCALED_PATH=$(pwd)/tailscaled ./tailssh.test -test.v -test.run 'TestSSHAgentForwarding|TestIntegration|TestDoDropPrivileges'