fix flaky tests: improve test isolation and reliability

Test fixes for consistent pass rates under repeated runs (-count=N):

Global state isolation:
- appc: check metric deltas instead of absolute values
- cmd/derper: initialize DNS caches before test
- tsweb/varz: use unpublished expvar to avoid global registration
- wgengine/netstack: check metric deltas instead of absolute values
- net/dns: use SetForTest() with deferred restore for hooks

Timeout and concurrency:
- cmd/containerboot: increase wait loop timeouts for parallel load
- tsconsensus: add deadline to waitFor(), use sync.Once for netns
- tstest/integration: add tstest.Shard/Parallel, fix IPN bus watchers
- net/netcheck: set testCaptivePortalDelay to prevent hangs
- wgengine/magicsock: use Port:0, add timeout to callback wait
- drive/driveimpl: use http.Server for proper shutdown

Race conditions:
- tstest/archtest: remove !race from build constraint
- util/deephash: use local sink variable instead of package-level
- net/art: switch to math/rand/v2 for thread-safe globals
- tstest/integration: use Status() instead of MustStatus() from goroutines

Test optimization:
- net/udprelay: rewrite VNI test to avoid iterating all 16M values
- ipn/ipnlocal: reset env vars between subtests
- cmd/containerboot/serve: use SetWaitDurationForTest
- tsnet: wait for service VIP in AllowedIPs before dialing

Change-Id: Id6186fb8a45031920550a208ded77382e57cc016
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
This commit is contained in:
Avery Pennarun 2026-04-13 03:54:59 +02:00
parent ebbf47ca1a
commit 97e021d8ed
19 changed files with 297 additions and 104 deletions

View File

@ -736,12 +736,19 @@ func TestRateLogger(t *testing.T) {
}
func TestRouteStoreMetrics(t *testing.T) {
// Capture initial metric values before the test actions, since metrics
// are global counters that persist across test runs.
initial := make(map[string]int64)
for _, x := range clientmetric.Metrics() {
initial[x.Name()] = x.Value()
}
metricStoreRoutes(1, 1)
metricStoreRoutes(1, 1) // the 1 buckets value should be 2
metricStoreRoutes(5, 5) // the 5 buckets value should be 1
metricStoreRoutes(6, 6) // the 10 buckets value should be 1
metricStoreRoutes(10001, 10001) // the over buckets value should be 1
wanted := map[string]int64{
wantedDelta := map[string]int64{
"appc_store_routes_n_routes_1": 2,
"appc_store_routes_rate_1": 2,
"appc_store_routes_n_routes_5": 1,
@ -752,8 +759,9 @@ func TestRouteStoreMetrics(t *testing.T) {
"appc_store_routes_rate_over": 1,
}
for _, x := range clientmetric.Metrics() {
if x.Value() != wanted[x.Name()] {
t.Errorf("%s: want: %d, got: %d", x.Name(), wanted[x.Name()], x.Value())
delta := x.Value() - initial[x.Name()]
if delta != wantedDelta[x.Name()] {
t.Errorf("%s: want delta: %d, got delta: %d", x.Name(), wantedDelta[x.Name()], delta)
}
}
}

View File

@ -1324,9 +1324,13 @@ func TestContainerBoot(t *testing.T) {
}
wantCmds = append(wantCmds, p.WantCmds...)
waitArgs(t, 2*time.Second, env.d, env.argFile, strings.Join(wantCmds, "\n"))
// Use generous timeouts (10s) because tests run in parallel,
// causing resource contention. The polling interval is 100ms,
// so successful tests complete quickly; the timeout only
// affects failure cases.
waitArgs(t, 10*time.Second, env.d, env.argFile, strings.Join(wantCmds, "\n"))
err = tstest.WaitFor(2*time.Second, func() error {
err = tstest.WaitFor(10*time.Second, func() error {
for path, want := range p.WantFiles {
gotBs, err := os.ReadFile(filepath.Join(env.d, path))
if err != nil {
@ -1343,7 +1347,7 @@ func TestContainerBoot(t *testing.T) {
}
for url, want := range p.EndpointStatuses {
err := tstest.WaitFor(2*time.Second, func() error {
err := tstest.WaitFor(10*time.Second, func() error {
resp, err := http.Get(url)
if err != nil && want != -1 {
return fmt.Errorf("GET %s: %v", url, err)
@ -1361,7 +1365,7 @@ func TestContainerBoot(t *testing.T) {
}
}
}
waitLogLine(t, 2*time.Second, cbOut, "Startup complete, waiting for shutdown signal")
waitLogLine(t, 10*time.Second, cbOut, "Startup complete, waiting for shutdown signal")
if cmd.ProcessState != nil {
t.Fatalf("containerboot should be running but exited with exit code %d", cmd.ProcessState.ExitCode())
}

View File

@ -10,11 +10,13 @@ import (
"os"
"path/filepath"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"tailscale.com/ipn"
"tailscale.com/kube/kubetypes"
"tailscale.com/kube/localclient"
"tailscale.com/kube/services"
"tailscale.com/tailcfg"
)
@ -198,6 +200,10 @@ func TestReadServeConfig(t *testing.T) {
}
func TestRefreshAdvertiseServices(t *testing.T) {
// Use a very short wait duration for tests to avoid 20-second delays.
restore := services.SetWaitDurationForTest(1 * time.Millisecond)
t.Cleanup(restore)
tests := []struct {
name string
sc *ipn.ServeConfig

View File

@ -166,6 +166,13 @@ func TestUnpublishedDNSEmptyList(t *testing.T) {
}
func TestLookupMetric(t *testing.T) {
// Set up the DNS caches so handleBootstrapDNS returns valid JSON.
pub := &dnsEntryMap{
IPs: map[string][]net.IP{"tailscale.com": {net.IPv4(10, 10, 10, 10)}},
}
dnsCache.Store(pub)
dnsCacheBytes.Store([]byte(`{"tailscale.com":["10.10.10.10"]}`))
d := []string{"a.io", "b.io", "c.io", "d.io", "e.io", "e.io", "e.io", "a.io"}
resetMetrics()
for _, q := range d {

View File

@ -434,6 +434,7 @@ type local struct {
type remote struct {
l net.Listener
httpServer *http.Server
fs *FileSystemForRemote
fileServer *FileServer
shares map[string]string
@ -510,14 +511,15 @@ func (s *system) addRemote(name string) string {
s.t.Logf("FileServer for %v listening at %s", name, fileServer.Addr())
r := &remote{
l: ln,
fileServer: fileServer,
fs: NewFileSystemForRemote(log.Printf),
shares: make(map[string]string),
l: ln,
fileServer: fileServer,
fs: NewFileSystemForRemote(log.Printf),
shares: make(map[string]string),
permissions: make(map[string]drive.Permission),
}
r.fs.SetFileServerAddr(fileServer.Addr())
go http.Serve(ln, r)
r.httpServer = &http.Server{Handler: r}
go r.httpServer.Serve(ln)
s.remotes[name] = r
remotes := make([]*drive.Remote, 0, len(s.remotes))
@ -694,9 +696,9 @@ func (s *system) stop() {
s.t.Fatalf("failed to Close remote fs: %s", err)
}
err = r.l.Close()
err = r.httpServer.Close()
if err != nil {
s.t.Fatalf("failed to Close remote listener: %s", err)
s.t.Fatalf("failed to Close remote http server: %s", err)
}
err = r.fileServer.Close()

View File

@ -518,9 +518,13 @@ func TestGetCertPEMWithValidity(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Always reset the cert share mode at the start of each subtest to ensure
// test isolation. Without this, a previous subtest's "ro" setting would
// leak into subsequent subtests.
if tt.readOnlyMode {
envknob.Setenv("TS_CERT_SHARE_MODE", "ro")
} else {
envknob.Setenv("TS_CERT_SHARE_MODE", "")
}
os.RemoveAll(certDir)

View File

@ -6,7 +6,7 @@ package art
import (
crand "crypto/rand"
"fmt"
"math/rand"
"math/rand/v2"
"net/netip"
"runtime"
"strconv"
@ -1140,7 +1140,7 @@ func randomPrefixes4(n int) []slowPrefixEntry[int] {
pfxs := map[netip.Prefix]bool{}
for len(pfxs) < n {
len := rand.Intn(33)
len := rand.IntN(33)
pfx, err := randomAddr4().Prefix(len)
if err != nil {
panic(err)
@ -1161,7 +1161,7 @@ func randomPrefixes6(n int) []slowPrefixEntry[int] {
pfxs := map[netip.Prefix]bool{}
for len(pfxs) < n {
len := rand.Intn(129)
len := rand.IntN(129)
pfx, err := randomAddr6().Prefix(len)
if err != nil {
panic(err)
@ -1179,7 +1179,7 @@ func randomPrefixes6(n int) []slowPrefixEntry[int] {
// randomAddr returns a randomly generated IP address.
func randomAddr() netip.Addr {
if rand.Intn(2) == 1 {
if rand.IntN(2) == 1 {
return randomAddr6()
} else {
return randomAddr4()

View File

@ -21,7 +21,8 @@ import (
)
func TestDNSTrampleRecovery(t *testing.T) {
HookWatchFile.Set(watchFile)
restore := HookWatchFile.SetForTest(watchFile)
defer restore()
synctest.Test(t, func(t *testing.T) {
tmp := t.TempDir()
if err := os.MkdirAll(filepath.Join(tmp, "etc"), 0700); err != nil {

View File

@ -32,6 +32,10 @@ func newTestClient(t testing.TB) *Client {
TimeNow: func() time.Time {
return time.Unix(1729624521, 0)
},
// Disable captive portal detection in tests by setting a very long
// delay. The captivePortalStop() call before waiting ensures the
// channel is closed even if the timer hasn't fired.
testCaptivePortalDelay: time.Hour,
}
return c
}

View File

@ -94,6 +94,7 @@ type Server struct {
closed bool
lamportID uint64
nextVNI uint32
testVNIPoolSize uint32 // if non-zero, overrides totalPossibleVNI for testing
// serverEndpointByVNI is consistent with serverEndpointByDisco while mu is
// held, i.e. mu must be held around write ops. Read ops in performance
// sensitive paths, e.g. packet forwarding, do not need to acquire mu.
@ -977,7 +978,11 @@ func (e ErrServerNotReady) Error() string {
// For now, we favor simplicity and reducing VNI re-use over more complex
// ephemeral port (VNI) selection algorithms.
func (s *Server) getNextVNILocked() (uint32, error) {
for range totalPossibleVNI {
poolSize := totalPossibleVNI
if s.testVNIPoolSize > 0 {
poolSize = s.testVNIPoolSize
}
for range poolSize {
vni := s.nextVNI
if vni == maxVNI {
s.nextVNI = minVNI

View File

@ -342,22 +342,95 @@ func TestServer(t *testing.T) {
func TestServer_getNextVNILocked(t *testing.T) {
t.Parallel()
c := qt.New(t)
// Test 1: Basic sequential allocation from minVNI
s := &Server{
nextVNI: minVNI,
}
for range uint64(totalPossibleVNI) {
vni, err := s.getNextVNILocked()
if err != nil { // using quicktest here triples test time
t.Fatal(err)
}
s.serverEndpointByVNI.Store(vni, nil)
}
c.Assert(s.nextVNI, qt.Equals, minVNI)
_, err := s.getNextVNILocked()
c.Assert(err, qt.IsNotNil)
s.serverEndpointByVNI.Delete(minVNI)
_, err = s.getNextVNILocked()
vni, err := s.getNextVNILocked()
c.Assert(err, qt.IsNil)
c.Assert(vni, qt.Equals, minVNI)
c.Assert(s.nextVNI, qt.Equals, minVNI+1)
// Test 2: Wrapping from maxVNI back to minVNI
s = &Server{
nextVNI: maxVNI,
}
vni, err = s.getNextVNILocked()
c.Assert(err, qt.IsNil)
c.Assert(vni, qt.Equals, maxVNI)
c.Assert(s.nextVNI, qt.Equals, minVNI) // Should wrap to minVNI
// Test 3: Skips occupied VNIs
s = &Server{
nextVNI: minVNI,
}
s.serverEndpointByVNI.Store(minVNI, nil)
s.serverEndpointByVNI.Store(minVNI+1, nil)
vni, err = s.getNextVNILocked()
c.Assert(err, qt.IsNil)
c.Assert(vni, qt.Equals, minVNI+2)
// Test 4: Wrapping while skipping occupied VNIs
s = &Server{
nextVNI: maxVNI - 1,
}
s.serverEndpointByVNI.Store(maxVNI-1, nil)
s.serverEndpointByVNI.Store(maxVNI, nil)
vni, err = s.getNextVNILocked()
c.Assert(err, qt.IsNil)
c.Assert(vni, qt.Equals, minVNI)
c.Assert(s.nextVNI, qt.Equals, minVNI+1)
// Test 5: VNI freeing and re-allocation
// Verify that when VNIs are freed, new allocations continue from nextVNI
// (the freed VNI won't be returned until nextVNI wraps back around to it).
s = &Server{
nextVNI: minVNI,
}
s.serverEndpointByVNI.Store(minVNI, nil)
s.serverEndpointByVNI.Store(minVNI+1, nil)
s.serverEndpointByVNI.Store(minVNI+2, nil)
vni, err = s.getNextVNILocked()
c.Assert(err, qt.IsNil)
c.Assert(vni, qt.Equals, minVNI+3)
// Delete minVNI+1 and verify we can get VNIs (even though minVNI+1 won't
// be returned next since nextVNI has advanced past it)
s.serverEndpointByVNI.Delete(minVNI + 1)
vni, err = s.getNextVNILocked()
c.Assert(err, qt.IsNil)
c.Assert(vni, qt.Equals, minVNI+4) // nextVNI is now at minVNI+4
// Test 6: Pool exhaustion with testVNIPoolSize
// Use a small pool size to test the exhaustion error path without
// allocating 16M entries. testVNIPoolSize limits how many VNIs the
// algorithm checks before giving up, simulating a full pool.
//
// We occupy more VNIs than testVNIPoolSize to ensure that no matter
// where nextVNI starts, the search will only find occupied VNIs.
const testPoolSize = 100
s = &Server{
nextVNI: minVNI,
testVNIPoolSize: testPoolSize,
}
// Occupy VNIs [minVNI, minVNI + testPoolSize*2) to ensure the search
// window is fully covered regardless of where nextVNI advances to
for i := uint32(0); i < testPoolSize*2; i++ {
s.serverEndpointByVNI.Store(minVNI+i, nil)
}
// Search starts at minVNI, checks testPoolSize VNIs, all are occupied
_, err = s.getNextVNILocked()
c.Assert(err, qt.IsNotNil)
c.Assert(err.Error(), qt.Equals, "VNI pool exhausted")
// Test 7: Recovery after freeing a VNI in the search window
// nextVNI is now at minVNI+testPoolSize (after the failed search advanced it)
// Free a VNI that will be found in the next search
s.serverEndpointByVNI.Delete(minVNI + testPoolSize + 10)
vni, err = s.getNextVNILocked()
c.Assert(err, qt.IsNil)
c.Assert(vni, qt.Equals, minVNI+testPoolSize+10)
}
func Test_blakeMACFromBindMsg(t *testing.T) {

View File

@ -40,6 +40,24 @@ import (
"tailscale.com/util/racebuild"
)
func TestMain(m *testing.M) {
// tailscale/corp#4520: don't use netns for tests.
//
// When netns is enabled, it sets socket options (SO_MARK on Linux,
// IP_BOUND_IF on macOS) to bypass Tailscale routing and reach the physical
// network directly. These tests use fake localhost networks with tsnet.Server
// and testcontrol.Server. With netns enabled:
// - Socket options may fail (non-root can't set SO_MARK on Linux)
// - Traffic may route to real interfaces instead of test localhost servers
//
// We disable netns once in TestMain rather than per-test because the previous
// pattern of disabling in each test and restoring in t.Cleanup caused races:
// when tests run in parallel, Test A's cleanup would restore netns while
// Test B was still running with netns disabled.
netns.SetEnabled(false)
os.Exit(m.Run())
}
type fsm struct {
mu sync.Mutex
applyEvents []string
@ -125,11 +143,7 @@ func testConfig(t *testing.T) {
func startControl(t testing.TB) (control *testcontrol.Server, controlURL string) {
t.Helper()
// tailscale/corp#4520: don't use netns for tests.
netns.SetEnabled(false)
t.Cleanup(func() {
netns.SetEnabled(true)
})
// netns is disabled for this package in TestMain.
derpLogf := logger.Discard
derpMap := integration.RunDERPAndSTUN(t, derpLogf, "127.0.0.1")
@ -269,8 +283,10 @@ func TestStart(t *testing.T) {
func waitFor(t testing.TB, msg string, condition func() bool, waitBetweenTries time.Duration) {
t.Helper()
// Use a deadline to prevent infinite waiting under load
deadline := time.Now().Add(2 * time.Minute)
try := 0
for true {
for time.Now().Before(deadline) {
try++
done := condition()
if done {
@ -279,6 +295,7 @@ func waitFor(t testing.TB, msg string, condition func() bool, waitBetweenTries t
}
time.Sleep(waitBetweenTries)
}
t.Fatalf("waitFor timeout: %s: failed after %d tries over 2 minutes", msg, try)
}
type participant struct {

View File

@ -1012,21 +1012,51 @@ func setUpServiceState(t *testing.T, name, ip string, host, client *Server,
})
// Wait until both nodes have up-to-date netmaps before
// proceeding with the test.
netmapUpToDate := func(nm *netmap.NetworkMap) bool {
// proceeding with the test. We need to check both:
// 1. DNS records contain the service IP (sentinel for netmap propagation)
// 2. For the client: the service host peer has the service VIP in AllowedIPs
// (required for routing to work)
servicePrefix := netip.MustParsePrefix(ip + "/32")
hasDNSRecord := func(nm *netmap.NetworkMap) bool {
return nm != nil && slices.ContainsFunc(nm.DNS.ExtraRecords, func(r tailcfg.DNSRecord) bool {
return r.Value == ip
})
}
waitForLatestNetmap := func(t *testing.T, s *Server) {
hasRouteToService := func(nm *netmap.NetworkMap, hostNodeKey key.NodePublic) bool {
if nm == nil {
return false
}
for _, peer := range nm.Peers {
if peer.Key() == hostNodeKey {
for _, aip := range peer.AllowedIPs().All() {
if aip == servicePrefix {
return true
}
}
}
}
return false
}
waitForLatestNetmap := func(t *testing.T, s *Server, checkRoute bool, hostNodeKey key.NodePublic) {
t.Helper()
w := must.Get(s.localClient.WatchIPNBus(t.Context(), ipn.NotifyInitialNetMap))
defer w.Close()
for n := must.Get(w.Next()); !netmapUpToDate(n.NetMap); n = must.Get(w.Next()) {
for {
n := must.Get(w.Next())
if !hasDNSRecord(n.NetMap) {
continue
}
if checkRoute && !hasRouteToService(n.NetMap, hostNodeKey) {
continue
}
break
}
}
waitForLatestNetmap(t, client)
waitForLatestNetmap(t, host)
hostNodeKey := host.lb.NodeKey()
// Client needs both DNS and route to service host
waitForLatestNetmap(t, client, true, hostNodeKey)
// Host only needs DNS (it's the one hosting the service)
waitForLatestNetmap(t, host, false, key.NodePublic{})
}
func TestListenService(t *testing.T) {

View File

@ -1,7 +1,12 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
//go:build linux && amd64 && !race
// This file previously had "!race" in its build constraint, which was a CI
// optimization (qemu binaries aren't installed on the race builder to save
// time). The constraint was removed because it's not a technical requirement:
// the test gracefully skips architectures when qemu-{arch} isn't available.
//go:build linux && amd64
package archtest

View File

@ -34,7 +34,6 @@ import (
"github.com/miekg/dns"
"go4.org/mem"
"tailscale.com/client/local"
"tailscale.com/client/tailscale"
"tailscale.com/cmd/testwrapper/flakytest"
"tailscale.com/feature"
_ "tailscale.com/feature/clientupdate"
@ -1194,40 +1193,46 @@ func TestClientSideJailing(t *testing.T) {
}
}
b1, err := lc1.WatchIPNBus(context.Background(), 0)
if err != nil {
t.Fatal(err)
}
b2, err := lc2.WatchIPNBus(context.Background(), 0)
if err != nil {
t.Fatal(err)
}
waitPeerIsJailed := func(t *testing.T, b *tailscale.IPNBusWatcher, jailed bool) {
t.Helper()
for {
n, err := b.Next()
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// Start watchers BEFORE calling SetJailed to ensure we don't miss the update.
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
b1, err := lc1.WatchIPNBus(ctx, 0)
if err != nil {
t.Fatal(err)
}
if n.NetMap == nil {
continue
defer b1.Close()
b2, err := lc2.WatchIPNBus(ctx, 0)
if err != nil {
t.Fatal(err)
}
if len(n.NetMap.Peers) == 0 {
continue
}
if j := n.NetMap.Peers[0].IsJailed(); j == jailed {
break
}
}
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
defer b2.Close()
env.Control.SetJailed(k1, k2, tc.n2JailedForN1)
env.Control.SetJailed(k2, k1, tc.n1JailedForN2)
// Wait for the jailed status to propagate.
waitPeerIsJailed(t, b1, tc.n2JailedForN1)
waitPeerIsJailed(t, b2, tc.n1JailedForN2)
waitPeerIsJailed := func(b *local.IPNBusWatcher, jailed bool) {
t.Helper()
for {
n, err := b.Next()
if err != nil {
t.Fatalf("waiting for peer jailed=%v: %v", jailed, err)
}
if n.NetMap == nil {
continue
}
if len(n.NetMap.Peers) == 0 {
continue
}
if j := n.NetMap.Peers[0].IsJailed(); j == jailed {
return
}
}
}
waitPeerIsJailed(b1, tc.n2JailedForN1)
waitPeerIsJailed(b2, tc.n1JailedForN2)
testDial(t, lc1, ip2, port, tc.n1JailedForN2)
testDial(t, lc2, ip1, port, tc.n2JailedForN1)
@ -2050,18 +2055,28 @@ func TestPeerRelayPing(t *testing.T) {
a := pair[0]
z := pair[1]
err := tstest.WaitFor(time.Second*10, func() error {
remoteKey := z.MustStatus().Self.PublicKey
// Use Status() instead of MustStatus() to avoid calling t.Fatal
// from a goroutine, which can cause a race with test cleanup.
zSt, err := z.Status()
if err != nil {
return fmt.Errorf("z.Status: %w", err)
}
remoteKey := zSt.Self.PublicKey
if err := a.Tailscale("ping", "--until-direct=false", "--c=1", "--timeout=1s", z.AwaitIP4().String()).Run(); err != nil {
return err
}
remotePeer, ok := a.MustStatus().Peer[remoteKey]
aSt, err := a.Status()
if err != nil {
return fmt.Errorf("a.Status: %w", err)
}
remotePeer, ok := aSt.Peer[remoteKey]
if !ok {
return fmt.Errorf("%v->%v remote peer not found", a.MustStatus().Self.ID, z.MustStatus().Self.ID)
return fmt.Errorf("%v->%v remote peer not found", aSt.Self.ID, zSt.Self.ID)
}
if len(remotePeer.PeerRelay) == 0 {
return fmt.Errorf("%v->%v not using peer relay, curAddr=%v relay=%v", a.MustStatus().Self.ID, z.MustStatus().Self.ID, remotePeer.CurAddr, remotePeer.Relay)
return fmt.Errorf("%v->%v not using peer relay, curAddr=%v relay=%v", aSt.Self.ID, zSt.Self.ID, remotePeer.CurAddr, remotePeer.Relay)
}
t.Logf("%v->%v using peer relay addr: %v", a.MustStatus().Self.ID, z.MustStatus().Self.ID, remotePeer.PeerRelay)
t.Logf("%v->%v using peer relay addr: %v", aSt.Self.ID, zSt.Self.ID, remotePeer.PeerRelay)
return nil
})
errCh <- err
@ -2256,6 +2271,8 @@ func TestC2NDebugNetmap(t *testing.T) {
}
func TestTailnetLock(t *testing.T) {
tstest.Shard(t)
tstest.Parallel(t)
// If you run `tailscale lock log` on a node where Tailnet Lock isn't
// enabled, you get an error explaining that.

View File

@ -205,7 +205,9 @@ func TestVarzHandler(t *testing.T) {
"string_map",
func() *expvar.Map {
m := new(expvar.Map)
m.Set("a", expvar.NewString("foo"))
s := new(expvar.String)
s.Set("foo")
m.Set("a", s)
return m
}(),
"# skipping \"string_map\" expvar map key \"a\" with unknown value type *expvar.String\n",

View File

@ -26,7 +26,6 @@ import (
"tailscale.com/types/key"
"tailscale.com/util/deephash/testtype"
"tailscale.com/util/hashx"
"tailscale.com/version"
)
type appendBytes []byte
@ -777,10 +776,6 @@ func TestMapCyclicFallback(t *testing.T) {
}
func TestArrayAllocs(t *testing.T) {
if version.IsRace() {
t.Skip("skipping test under race detector")
}
// In theory, there should be no allocations. However, escape analysis on
// certain architectures fails to detect that certain cases do not escape.
// This discrepancy currently affects sha256.digest.Sum.
@ -801,9 +796,11 @@ func TestArrayAllocs(t *testing.T) {
X [32]byte
}
x := &T{X: [32]byte{1: 1, 2: 2, 3: 3, 4: 4}}
var localSink Sum
got := int(testing.AllocsPerRun(1000, func() {
sink = Hash(x)
localSink = Hash(x)
}))
_ = localSink // prevent compiler from optimizing away the Hash call
if got > want {
t.Errorf("allocs = %v; want %v", got, want)
}

View File

@ -414,9 +414,11 @@ func TestNewConn(t *testing.T) {
stunAddr, stunCleanupFn := stuntest.Serve(t)
defer stunCleanupFn()
port := pickPort(t)
// Use port 0 to let the system assign a port, avoiding TOCTOU races
// from the previous pickPort approach which would close a socket and
// hope to rebind to the same port.
conn, err := NewConn(Options{
Port: port,
Port: 0,
DisablePortMapper: true,
EndpointsFunc: epFunc,
Logf: t.Logf,
@ -428,6 +430,13 @@ func TestNewConn(t *testing.T) {
t.Fatal(err)
}
defer conn.Close()
// Get the actual port that was assigned
port := conn.LocalPort()
if port == 0 {
t.Fatal("LocalPort returned 0")
}
conn.SetDERPMap(stuntest.DERPMapOf(stunAddr.String()))
conn.SetPrivateKey(key.NewNode())
@ -463,16 +472,6 @@ collectEndpoints:
}
}
func pickPort(t testing.TB) uint16 {
t.Helper()
conn, err := net.ListenPacket("udp4", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer conn.Close()
return uint16(conn.LocalAddr().(*net.UDPAddr).Port)
}
func TestPickDERPFallback(t *testing.T) {
tstest.PanicOnLog()
tstest.ResourceCheck(t)
@ -557,6 +556,8 @@ func TestDERPActiveFuncCalledAfterConnect(t *testing.T) {
EndpointsFunc: func([]tailcfg.Endpoint) {},
DERPActiveFunc: func() {
// derpStarted should already be closed when DERPActiveFunc is called.
// The goroutine calling this waits on startGate (= derpStarted for
// firstDerp), so by the time we get here, derpStarted must be closed.
select {
case <-conn.derpStarted:
resultCh <- true
@ -575,8 +576,14 @@ func TestDERPActiveFuncCalledAfterConnect(t *testing.T) {
t.Fatal(err)
}
if ok := <-resultCh; !ok {
t.Error("DERPActiveFunc was called before DERP connection was established")
// Add a timeout to avoid hanging forever if DERPActiveFunc is never called.
select {
case ok := <-resultCh:
if !ok {
t.Error("DERPActiveFunc was called before DERP connection was established")
}
case <-time.After(10 * time.Second):
t.Fatal("timeout waiting for DERPActiveFunc to be called")
}
}
@ -1435,7 +1442,6 @@ func Test32bitAlignment(t *testing.T) {
// newTestConn returns a new Conn.
func newTestConn(t testing.TB) *Conn {
t.Helper()
port := pickPort(t)
bus := eventbustest.NewBus(t)
@ -1445,6 +1451,7 @@ func newTestConn(t testing.TB) *Conn {
}
t.Cleanup(func() { netMon.Close() })
// Use port 0 to let the system assign a port, avoiding TOCTOU races.
conn, err := NewConn(Options{
NetMon: netMon,
EventBus: bus,
@ -1452,7 +1459,7 @@ func newTestConn(t testing.TB) *Conn {
Metrics: new(usermetric.Registry),
DisablePortMapper: true,
Logf: t.Logf,
Port: port,
Port: 0,
TestOnlyPacketListener: localhostListener{},
EndpointsFunc: func(eps []tailcfg.Endpoint) {
t.Logf("endpoints: %q", eps)

View File

@ -870,6 +870,10 @@ func TestTCPForwardLimits_PerClient(t *testing.T) {
}
}
// Capture the initial value of the clientmetric before injecting packets,
// since it's a global counter that persists across test runs.
initialClientMetric := metricPerClientForwardLimit.Value()
// Inject the packet to start the TCP forward and wait until we have an
// in-flight outgoing connection.
mustInjectPacket()
@ -901,9 +905,9 @@ func TestTCPForwardLimits_PerClient(t *testing.T) {
t.Errorf("got expvar metric %q=%s, want 1", metricName, v)
}
// client metric
if v := metricPerClientForwardLimit.Value(); v != 1 {
t.Errorf("got clientmetric limit metric=%d, want 1", v)
// client metric - check it increased by 1 from initial value
if v := metricPerClientForwardLimit.Value(); v != initialClientMetric+1 {
t.Errorf("got clientmetric limit metric=%d, want %d", v, initialClientMetric+1)
}
}