From 50b8cfbde2fc548a6c5b33b3c7021997ca14a0d9 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 13 Apr 2026 20:31:35 +0000 Subject: [PATCH] wgengine/netstack: fix data race on in-flight connection test globals The maxInFlightConnectionAttemptsForTest and maxInFlightConnectionAttemptsPerClientForTest globals were plain ints read by background gVisor TCP handler goroutines (via wrapTCPProtocolHandler) and written by tstest.Replace cleanup in TestTCPForwardLimits_PerClient. When a gVisor goroutine outlived the test cleanup window, the race detector caught the unsynchronized access. The race-prone code was introduced in c5abbcd4b4d8 (2024-02-26, "wgengine/netstack: add a per-client limit for in-flight TCP forwards") which added both the plain int globals and the TestTCPForwardLimits_PerClient test that writes them via tstest.Replace. It is not obvious why this has only recently started being detected as a data race; likely some combination of gVisor version bumps, Go toolchain scheduler changes, and additional TCP-injecting subtests (e.g. 03461ea7f, 2026-01-30) increased goroutine churn enough to hit the window. Change both globals to atomic.Int32 and replace tstest.Replace (which does non-atomic *target = old on cleanup) with explicit Store/Cleanup pairs. Fixes #19118 Change-Id: Id26ba6fbfb2e4ade319976db80af8e16c7c8778e Signed-off-by: Brad Fitzpatrick --- wgengine/netstack/netstack.go | 16 +++++++++------- wgengine/netstack/netstack_test.go | 6 ++++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go index 4da89e364..9f65b50c0 100644 --- a/wgengine/netstack/netstack.go +++ b/wgengine/netstack/netstack.go @@ -64,10 +64,12 @@ import ( const debugPackets = false // If non-zero, these override the values returned from the corresponding -// functions, below. +// functions, below. They are accessed atomically because background +// goroutines in the gVisor TCP stack read them while test cleanup +// goroutines may be restoring them concurrently. var ( - maxInFlightConnectionAttemptsForTest int - maxInFlightConnectionAttemptsPerClientForTest int + maxInFlightConnectionAttemptsForTest atomic.Int32 + maxInFlightConnectionAttemptsPerClientForTest atomic.Int32 ) // maxInFlightConnectionAttempts returns the global number of in-flight @@ -80,8 +82,8 @@ var ( // connection, so we want to ensure that we don't allow an unbounded number of // connections. func maxInFlightConnectionAttempts() int { - if n := maxInFlightConnectionAttemptsForTest; n > 0 { - return n + if n := maxInFlightConnectionAttemptsForTest.Load(); n > 0 { + return int(n) } if version.IsMobile() { @@ -106,8 +108,8 @@ func maxInFlightConnectionAttempts() int { // maxInFlightConnectionAttempts, but applies on a per-client basis // (i.e. keyed by the remote Tailscale IP). func maxInFlightConnectionAttemptsPerClient() int { - if n := maxInFlightConnectionAttemptsPerClientForTest; n > 0 { - return n + if n := maxInFlightConnectionAttemptsPerClientForTest.Load(); n > 0 { + return int(n) } // For now, allow each individual client at most 2/3rds of the global diff --git a/wgengine/netstack/netstack_test.go b/wgengine/netstack/netstack_test.go index e588fa47c..fd44741bc 100644 --- a/wgengine/netstack/netstack_test.go +++ b/wgengine/netstack/netstack_test.go @@ -814,8 +814,10 @@ func TestTCPForwardLimits_PerClient(t *testing.T) { envknob.Setenv("TS_DEBUG_NETSTACK", "true") // Set our test override limits during this test. - tstest.Replace(t, &maxInFlightConnectionAttemptsForTest, 2) - tstest.Replace(t, &maxInFlightConnectionAttemptsPerClientForTest, 1) + maxInFlightConnectionAttemptsForTest.Store(2) + t.Cleanup(func() { maxInFlightConnectionAttemptsForTest.Store(0) }) + maxInFlightConnectionAttemptsPerClientForTest.Store(1) + t.Cleanup(func() { maxInFlightConnectionAttemptsPerClientForTest.Store(0) }) impl := makeNetstack(t, func(impl *Impl) { impl.ProcessSubnets = true