wgengine/netstack: fix data race on in-flight connection test globals

The maxInFlightConnectionAttemptsForTest and
maxInFlightConnectionAttemptsPerClientForTest globals were plain ints
read by background gVisor TCP handler goroutines (via
wrapTCPProtocolHandler) and written by tstest.Replace cleanup in
TestTCPForwardLimits_PerClient. When a gVisor goroutine outlived the
test cleanup window, the race detector caught the unsynchronized
access.

The race-prone code was introduced in c5abbcd4b4d8 (2024-02-26,
"wgengine/netstack: add a per-client limit for in-flight TCP
forwards") which added both the plain int globals and the
TestTCPForwardLimits_PerClient test that writes them via
tstest.Replace. It is not obvious why this has only recently started
being detected as a data race; likely some combination of gVisor
version bumps, Go toolchain scheduler changes, and additional
TCP-injecting subtests (e.g. 03461ea7f, 2026-01-30) increased
goroutine churn enough to hit the window.

Change both globals to atomic.Int32 and replace tstest.Replace (which
does non-atomic *target = old on cleanup) with explicit Store/Cleanup
pairs.

Fixes #19118

Change-Id: Id26ba6fbfb2e4ade319976db80af8e16c7c8778e
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2026-04-13 20:31:35 +00:00 committed by Brad Fitzpatrick
parent 6500d3c3f8
commit 50b8cfbde2
2 changed files with 13 additions and 9 deletions

View File

@ -64,10 +64,12 @@ import (
const debugPackets = false
// If non-zero, these override the values returned from the corresponding
// functions, below.
// functions, below. They are accessed atomically because background
// goroutines in the gVisor TCP stack read them while test cleanup
// goroutines may be restoring them concurrently.
var (
maxInFlightConnectionAttemptsForTest int
maxInFlightConnectionAttemptsPerClientForTest int
maxInFlightConnectionAttemptsForTest atomic.Int32
maxInFlightConnectionAttemptsPerClientForTest atomic.Int32
)
// maxInFlightConnectionAttempts returns the global number of in-flight
@ -80,8 +82,8 @@ var (
// connection, so we want to ensure that we don't allow an unbounded number of
// connections.
func maxInFlightConnectionAttempts() int {
if n := maxInFlightConnectionAttemptsForTest; n > 0 {
return n
if n := maxInFlightConnectionAttemptsForTest.Load(); n > 0 {
return int(n)
}
if version.IsMobile() {
@ -106,8 +108,8 @@ func maxInFlightConnectionAttempts() int {
// maxInFlightConnectionAttempts, but applies on a per-client basis
// (i.e. keyed by the remote Tailscale IP).
func maxInFlightConnectionAttemptsPerClient() int {
if n := maxInFlightConnectionAttemptsPerClientForTest; n > 0 {
return n
if n := maxInFlightConnectionAttemptsPerClientForTest.Load(); n > 0 {
return int(n)
}
// For now, allow each individual client at most 2/3rds of the global

View File

@ -814,8 +814,10 @@ func TestTCPForwardLimits_PerClient(t *testing.T) {
envknob.Setenv("TS_DEBUG_NETSTACK", "true")
// Set our test override limits during this test.
tstest.Replace(t, &maxInFlightConnectionAttemptsForTest, 2)
tstest.Replace(t, &maxInFlightConnectionAttemptsPerClientForTest, 1)
maxInFlightConnectionAttemptsForTest.Store(2)
t.Cleanup(func() { maxInFlightConnectionAttemptsForTest.Store(0) })
maxInFlightConnectionAttemptsPerClientForTest.Store(1)
t.Cleanup(func() { maxInFlightConnectionAttemptsPerClientForTest.Store(0) })
impl := makeNetstack(t, func(impl *Impl) {
impl.ProcessSubnets = true