mirror of
https://github.com/tailscale/tailscale.git
synced 2026-05-05 04:06:35 +02:00
fix flaky tests: improve test isolation and reliability
Test fixes for consistent pass rates under repeated runs (-count=N): Global state isolation: - appc: check metric deltas instead of absolute values - cmd/derper: initialize DNS caches before test - tsweb/varz: use unpublished expvar to avoid global registration - wgengine/netstack: check metric deltas instead of absolute values - net/dns: use SetForTest() with deferred restore for hooks Timeout and concurrency: - cmd/containerboot: increase wait loop timeouts for parallel load - tsconsensus: add deadline to waitFor(), use sync.Once for netns - tstest/integration: add tstest.Shard/Parallel, fix IPN bus watchers - net/netcheck: set testCaptivePortalDelay to prevent hangs - wgengine/magicsock: use Port:0, add timeout to callback wait - drive/driveimpl: use http.Server for proper shutdown Race conditions: - tstest/archtest: remove !race from build constraint - util/deephash: use local sink variable instead of package-level - net/art: switch to math/rand/v2 for thread-safe globals - tstest/integration: use Status() instead of MustStatus() from goroutines Test optimization: - net/udprelay: rewrite VNI test to avoid iterating all 16M values - ipn/ipnlocal: reset env vars between subtests - cmd/containerboot/serve: use SetWaitDurationForTest - tsnet: wait for service VIP in AllowedIPs before dialing Change-Id: Id6186fb8a45031920550a208ded77382e57cc016 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
This commit is contained in:
parent
ebbf47ca1a
commit
97e021d8ed
@ -736,12 +736,19 @@ func TestRateLogger(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRouteStoreMetrics(t *testing.T) {
|
||||
// Capture initial metric values before the test actions, since metrics
|
||||
// are global counters that persist across test runs.
|
||||
initial := make(map[string]int64)
|
||||
for _, x := range clientmetric.Metrics() {
|
||||
initial[x.Name()] = x.Value()
|
||||
}
|
||||
|
||||
metricStoreRoutes(1, 1)
|
||||
metricStoreRoutes(1, 1) // the 1 buckets value should be 2
|
||||
metricStoreRoutes(5, 5) // the 5 buckets value should be 1
|
||||
metricStoreRoutes(6, 6) // the 10 buckets value should be 1
|
||||
metricStoreRoutes(10001, 10001) // the over buckets value should be 1
|
||||
wanted := map[string]int64{
|
||||
wantedDelta := map[string]int64{
|
||||
"appc_store_routes_n_routes_1": 2,
|
||||
"appc_store_routes_rate_1": 2,
|
||||
"appc_store_routes_n_routes_5": 1,
|
||||
@ -752,8 +759,9 @@ func TestRouteStoreMetrics(t *testing.T) {
|
||||
"appc_store_routes_rate_over": 1,
|
||||
}
|
||||
for _, x := range clientmetric.Metrics() {
|
||||
if x.Value() != wanted[x.Name()] {
|
||||
t.Errorf("%s: want: %d, got: %d", x.Name(), wanted[x.Name()], x.Value())
|
||||
delta := x.Value() - initial[x.Name()]
|
||||
if delta != wantedDelta[x.Name()] {
|
||||
t.Errorf("%s: want delta: %d, got delta: %d", x.Name(), wantedDelta[x.Name()], delta)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1324,9 +1324,13 @@ func TestContainerBoot(t *testing.T) {
|
||||
}
|
||||
|
||||
wantCmds = append(wantCmds, p.WantCmds...)
|
||||
waitArgs(t, 2*time.Second, env.d, env.argFile, strings.Join(wantCmds, "\n"))
|
||||
// Use generous timeouts (10s) because tests run in parallel,
|
||||
// causing resource contention. The polling interval is 100ms,
|
||||
// so successful tests complete quickly; the timeout only
|
||||
// affects failure cases.
|
||||
waitArgs(t, 10*time.Second, env.d, env.argFile, strings.Join(wantCmds, "\n"))
|
||||
|
||||
err = tstest.WaitFor(2*time.Second, func() error {
|
||||
err = tstest.WaitFor(10*time.Second, func() error {
|
||||
for path, want := range p.WantFiles {
|
||||
gotBs, err := os.ReadFile(filepath.Join(env.d, path))
|
||||
if err != nil {
|
||||
@ -1343,7 +1347,7 @@ func TestContainerBoot(t *testing.T) {
|
||||
}
|
||||
|
||||
for url, want := range p.EndpointStatuses {
|
||||
err := tstest.WaitFor(2*time.Second, func() error {
|
||||
err := tstest.WaitFor(10*time.Second, func() error {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil && want != -1 {
|
||||
return fmt.Errorf("GET %s: %v", url, err)
|
||||
@ -1361,7 +1365,7 @@ func TestContainerBoot(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
waitLogLine(t, 2*time.Second, cbOut, "Startup complete, waiting for shutdown signal")
|
||||
waitLogLine(t, 10*time.Second, cbOut, "Startup complete, waiting for shutdown signal")
|
||||
if cmd.ProcessState != nil {
|
||||
t.Fatalf("containerboot should be running but exited with exit code %d", cmd.ProcessState.ExitCode())
|
||||
}
|
||||
|
||||
@ -10,11 +10,13 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/kube/localclient"
|
||||
"tailscale.com/kube/services"
|
||||
"tailscale.com/tailcfg"
|
||||
)
|
||||
|
||||
@ -198,6 +200,10 @@ func TestReadServeConfig(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRefreshAdvertiseServices(t *testing.T) {
|
||||
// Use a very short wait duration for tests to avoid 20-second delays.
|
||||
restore := services.SetWaitDurationForTest(1 * time.Millisecond)
|
||||
t.Cleanup(restore)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
sc *ipn.ServeConfig
|
||||
|
||||
@ -166,6 +166,13 @@ func TestUnpublishedDNSEmptyList(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestLookupMetric(t *testing.T) {
|
||||
// Set up the DNS caches so handleBootstrapDNS returns valid JSON.
|
||||
pub := &dnsEntryMap{
|
||||
IPs: map[string][]net.IP{"tailscale.com": {net.IPv4(10, 10, 10, 10)}},
|
||||
}
|
||||
dnsCache.Store(pub)
|
||||
dnsCacheBytes.Store([]byte(`{"tailscale.com":["10.10.10.10"]}`))
|
||||
|
||||
d := []string{"a.io", "b.io", "c.io", "d.io", "e.io", "e.io", "e.io", "a.io"}
|
||||
resetMetrics()
|
||||
for _, q := range d {
|
||||
|
||||
@ -434,6 +434,7 @@ type local struct {
|
||||
|
||||
type remote struct {
|
||||
l net.Listener
|
||||
httpServer *http.Server
|
||||
fs *FileSystemForRemote
|
||||
fileServer *FileServer
|
||||
shares map[string]string
|
||||
@ -510,14 +511,15 @@ func (s *system) addRemote(name string) string {
|
||||
s.t.Logf("FileServer for %v listening at %s", name, fileServer.Addr())
|
||||
|
||||
r := &remote{
|
||||
l: ln,
|
||||
fileServer: fileServer,
|
||||
fs: NewFileSystemForRemote(log.Printf),
|
||||
shares: make(map[string]string),
|
||||
l: ln,
|
||||
fileServer: fileServer,
|
||||
fs: NewFileSystemForRemote(log.Printf),
|
||||
shares: make(map[string]string),
|
||||
permissions: make(map[string]drive.Permission),
|
||||
}
|
||||
r.fs.SetFileServerAddr(fileServer.Addr())
|
||||
go http.Serve(ln, r)
|
||||
r.httpServer = &http.Server{Handler: r}
|
||||
go r.httpServer.Serve(ln)
|
||||
s.remotes[name] = r
|
||||
|
||||
remotes := make([]*drive.Remote, 0, len(s.remotes))
|
||||
@ -694,9 +696,9 @@ func (s *system) stop() {
|
||||
s.t.Fatalf("failed to Close remote fs: %s", err)
|
||||
}
|
||||
|
||||
err = r.l.Close()
|
||||
err = r.httpServer.Close()
|
||||
if err != nil {
|
||||
s.t.Fatalf("failed to Close remote listener: %s", err)
|
||||
s.t.Fatalf("failed to Close remote http server: %s", err)
|
||||
}
|
||||
|
||||
err = r.fileServer.Close()
|
||||
|
||||
@ -518,9 +518,13 @@ func TestGetCertPEMWithValidity(t *testing.T) {
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
||||
// Always reset the cert share mode at the start of each subtest to ensure
|
||||
// test isolation. Without this, a previous subtest's "ro" setting would
|
||||
// leak into subsequent subtests.
|
||||
if tt.readOnlyMode {
|
||||
envknob.Setenv("TS_CERT_SHARE_MODE", "ro")
|
||||
} else {
|
||||
envknob.Setenv("TS_CERT_SHARE_MODE", "")
|
||||
}
|
||||
|
||||
os.RemoveAll(certDir)
|
||||
|
||||
@ -6,7 +6,7 @@ package art
|
||||
import (
|
||||
crand "crypto/rand"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"math/rand/v2"
|
||||
"net/netip"
|
||||
"runtime"
|
||||
"strconv"
|
||||
@ -1140,7 +1140,7 @@ func randomPrefixes4(n int) []slowPrefixEntry[int] {
|
||||
pfxs := map[netip.Prefix]bool{}
|
||||
|
||||
for len(pfxs) < n {
|
||||
len := rand.Intn(33)
|
||||
len := rand.IntN(33)
|
||||
pfx, err := randomAddr4().Prefix(len)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -1161,7 +1161,7 @@ func randomPrefixes6(n int) []slowPrefixEntry[int] {
|
||||
pfxs := map[netip.Prefix]bool{}
|
||||
|
||||
for len(pfxs) < n {
|
||||
len := rand.Intn(129)
|
||||
len := rand.IntN(129)
|
||||
pfx, err := randomAddr6().Prefix(len)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -1179,7 +1179,7 @@ func randomPrefixes6(n int) []slowPrefixEntry[int] {
|
||||
|
||||
// randomAddr returns a randomly generated IP address.
|
||||
func randomAddr() netip.Addr {
|
||||
if rand.Intn(2) == 1 {
|
||||
if rand.IntN(2) == 1 {
|
||||
return randomAddr6()
|
||||
} else {
|
||||
return randomAddr4()
|
||||
|
||||
@ -21,7 +21,8 @@ import (
|
||||
)
|
||||
|
||||
func TestDNSTrampleRecovery(t *testing.T) {
|
||||
HookWatchFile.Set(watchFile)
|
||||
restore := HookWatchFile.SetForTest(watchFile)
|
||||
defer restore()
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
if err := os.MkdirAll(filepath.Join(tmp, "etc"), 0700); err != nil {
|
||||
|
||||
@ -32,6 +32,10 @@ func newTestClient(t testing.TB) *Client {
|
||||
TimeNow: func() time.Time {
|
||||
return time.Unix(1729624521, 0)
|
||||
},
|
||||
// Disable captive portal detection in tests by setting a very long
|
||||
// delay. The captivePortalStop() call before waiting ensures the
|
||||
// channel is closed even if the timer hasn't fired.
|
||||
testCaptivePortalDelay: time.Hour,
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
@ -94,6 +94,7 @@ type Server struct {
|
||||
closed bool
|
||||
lamportID uint64
|
||||
nextVNI uint32
|
||||
testVNIPoolSize uint32 // if non-zero, overrides totalPossibleVNI for testing
|
||||
// serverEndpointByVNI is consistent with serverEndpointByDisco while mu is
|
||||
// held, i.e. mu must be held around write ops. Read ops in performance
|
||||
// sensitive paths, e.g. packet forwarding, do not need to acquire mu.
|
||||
@ -977,7 +978,11 @@ func (e ErrServerNotReady) Error() string {
|
||||
// For now, we favor simplicity and reducing VNI re-use over more complex
|
||||
// ephemeral port (VNI) selection algorithms.
|
||||
func (s *Server) getNextVNILocked() (uint32, error) {
|
||||
for range totalPossibleVNI {
|
||||
poolSize := totalPossibleVNI
|
||||
if s.testVNIPoolSize > 0 {
|
||||
poolSize = s.testVNIPoolSize
|
||||
}
|
||||
for range poolSize {
|
||||
vni := s.nextVNI
|
||||
if vni == maxVNI {
|
||||
s.nextVNI = minVNI
|
||||
|
||||
@ -342,22 +342,95 @@ func TestServer(t *testing.T) {
|
||||
func TestServer_getNextVNILocked(t *testing.T) {
|
||||
t.Parallel()
|
||||
c := qt.New(t)
|
||||
|
||||
// Test 1: Basic sequential allocation from minVNI
|
||||
s := &Server{
|
||||
nextVNI: minVNI,
|
||||
}
|
||||
for range uint64(totalPossibleVNI) {
|
||||
vni, err := s.getNextVNILocked()
|
||||
if err != nil { // using quicktest here triples test time
|
||||
t.Fatal(err)
|
||||
}
|
||||
s.serverEndpointByVNI.Store(vni, nil)
|
||||
}
|
||||
c.Assert(s.nextVNI, qt.Equals, minVNI)
|
||||
_, err := s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNotNil)
|
||||
s.serverEndpointByVNI.Delete(minVNI)
|
||||
_, err = s.getNextVNILocked()
|
||||
vni, err := s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(vni, qt.Equals, minVNI)
|
||||
c.Assert(s.nextVNI, qt.Equals, minVNI+1)
|
||||
|
||||
// Test 2: Wrapping from maxVNI back to minVNI
|
||||
s = &Server{
|
||||
nextVNI: maxVNI,
|
||||
}
|
||||
vni, err = s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(vni, qt.Equals, maxVNI)
|
||||
c.Assert(s.nextVNI, qt.Equals, minVNI) // Should wrap to minVNI
|
||||
|
||||
// Test 3: Skips occupied VNIs
|
||||
s = &Server{
|
||||
nextVNI: minVNI,
|
||||
}
|
||||
s.serverEndpointByVNI.Store(minVNI, nil)
|
||||
s.serverEndpointByVNI.Store(minVNI+1, nil)
|
||||
vni, err = s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(vni, qt.Equals, minVNI+2)
|
||||
|
||||
// Test 4: Wrapping while skipping occupied VNIs
|
||||
s = &Server{
|
||||
nextVNI: maxVNI - 1,
|
||||
}
|
||||
s.serverEndpointByVNI.Store(maxVNI-1, nil)
|
||||
s.serverEndpointByVNI.Store(maxVNI, nil)
|
||||
vni, err = s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(vni, qt.Equals, minVNI)
|
||||
c.Assert(s.nextVNI, qt.Equals, minVNI+1)
|
||||
|
||||
// Test 5: VNI freeing and re-allocation
|
||||
// Verify that when VNIs are freed, new allocations continue from nextVNI
|
||||
// (the freed VNI won't be returned until nextVNI wraps back around to it).
|
||||
s = &Server{
|
||||
nextVNI: minVNI,
|
||||
}
|
||||
s.serverEndpointByVNI.Store(minVNI, nil)
|
||||
s.serverEndpointByVNI.Store(minVNI+1, nil)
|
||||
s.serverEndpointByVNI.Store(minVNI+2, nil)
|
||||
vni, err = s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(vni, qt.Equals, minVNI+3)
|
||||
|
||||
// Delete minVNI+1 and verify we can get VNIs (even though minVNI+1 won't
|
||||
// be returned next since nextVNI has advanced past it)
|
||||
s.serverEndpointByVNI.Delete(minVNI + 1)
|
||||
vni, err = s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(vni, qt.Equals, minVNI+4) // nextVNI is now at minVNI+4
|
||||
|
||||
// Test 6: Pool exhaustion with testVNIPoolSize
|
||||
// Use a small pool size to test the exhaustion error path without
|
||||
// allocating 16M entries. testVNIPoolSize limits how many VNIs the
|
||||
// algorithm checks before giving up, simulating a full pool.
|
||||
//
|
||||
// We occupy more VNIs than testVNIPoolSize to ensure that no matter
|
||||
// where nextVNI starts, the search will only find occupied VNIs.
|
||||
const testPoolSize = 100
|
||||
s = &Server{
|
||||
nextVNI: minVNI,
|
||||
testVNIPoolSize: testPoolSize,
|
||||
}
|
||||
// Occupy VNIs [minVNI, minVNI + testPoolSize*2) to ensure the search
|
||||
// window is fully covered regardless of where nextVNI advances to
|
||||
for i := uint32(0); i < testPoolSize*2; i++ {
|
||||
s.serverEndpointByVNI.Store(minVNI+i, nil)
|
||||
}
|
||||
// Search starts at minVNI, checks testPoolSize VNIs, all are occupied
|
||||
_, err = s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNotNil)
|
||||
c.Assert(err.Error(), qt.Equals, "VNI pool exhausted")
|
||||
|
||||
// Test 7: Recovery after freeing a VNI in the search window
|
||||
// nextVNI is now at minVNI+testPoolSize (after the failed search advanced it)
|
||||
// Free a VNI that will be found in the next search
|
||||
s.serverEndpointByVNI.Delete(minVNI + testPoolSize + 10)
|
||||
vni, err = s.getNextVNILocked()
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(vni, qt.Equals, minVNI+testPoolSize+10)
|
||||
}
|
||||
|
||||
func Test_blakeMACFromBindMsg(t *testing.T) {
|
||||
|
||||
@ -40,6 +40,24 @@ import (
|
||||
"tailscale.com/util/racebuild"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
// tailscale/corp#4520: don't use netns for tests.
|
||||
//
|
||||
// When netns is enabled, it sets socket options (SO_MARK on Linux,
|
||||
// IP_BOUND_IF on macOS) to bypass Tailscale routing and reach the physical
|
||||
// network directly. These tests use fake localhost networks with tsnet.Server
|
||||
// and testcontrol.Server. With netns enabled:
|
||||
// - Socket options may fail (non-root can't set SO_MARK on Linux)
|
||||
// - Traffic may route to real interfaces instead of test localhost servers
|
||||
//
|
||||
// We disable netns once in TestMain rather than per-test because the previous
|
||||
// pattern of disabling in each test and restoring in t.Cleanup caused races:
|
||||
// when tests run in parallel, Test A's cleanup would restore netns while
|
||||
// Test B was still running with netns disabled.
|
||||
netns.SetEnabled(false)
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
type fsm struct {
|
||||
mu sync.Mutex
|
||||
applyEvents []string
|
||||
@ -125,11 +143,7 @@ func testConfig(t *testing.T) {
|
||||
|
||||
func startControl(t testing.TB) (control *testcontrol.Server, controlURL string) {
|
||||
t.Helper()
|
||||
// tailscale/corp#4520: don't use netns for tests.
|
||||
netns.SetEnabled(false)
|
||||
t.Cleanup(func() {
|
||||
netns.SetEnabled(true)
|
||||
})
|
||||
// netns is disabled for this package in TestMain.
|
||||
|
||||
derpLogf := logger.Discard
|
||||
derpMap := integration.RunDERPAndSTUN(t, derpLogf, "127.0.0.1")
|
||||
@ -269,8 +283,10 @@ func TestStart(t *testing.T) {
|
||||
|
||||
func waitFor(t testing.TB, msg string, condition func() bool, waitBetweenTries time.Duration) {
|
||||
t.Helper()
|
||||
// Use a deadline to prevent infinite waiting under load
|
||||
deadline := time.Now().Add(2 * time.Minute)
|
||||
try := 0
|
||||
for true {
|
||||
for time.Now().Before(deadline) {
|
||||
try++
|
||||
done := condition()
|
||||
if done {
|
||||
@ -279,6 +295,7 @@ func waitFor(t testing.TB, msg string, condition func() bool, waitBetweenTries t
|
||||
}
|
||||
time.Sleep(waitBetweenTries)
|
||||
}
|
||||
t.Fatalf("waitFor timeout: %s: failed after %d tries over 2 minutes", msg, try)
|
||||
}
|
||||
|
||||
type participant struct {
|
||||
|
||||
@ -1012,21 +1012,51 @@ func setUpServiceState(t *testing.T, name, ip string, host, client *Server,
|
||||
})
|
||||
|
||||
// Wait until both nodes have up-to-date netmaps before
|
||||
// proceeding with the test.
|
||||
netmapUpToDate := func(nm *netmap.NetworkMap) bool {
|
||||
// proceeding with the test. We need to check both:
|
||||
// 1. DNS records contain the service IP (sentinel for netmap propagation)
|
||||
// 2. For the client: the service host peer has the service VIP in AllowedIPs
|
||||
// (required for routing to work)
|
||||
servicePrefix := netip.MustParsePrefix(ip + "/32")
|
||||
hasDNSRecord := func(nm *netmap.NetworkMap) bool {
|
||||
return nm != nil && slices.ContainsFunc(nm.DNS.ExtraRecords, func(r tailcfg.DNSRecord) bool {
|
||||
return r.Value == ip
|
||||
})
|
||||
}
|
||||
waitForLatestNetmap := func(t *testing.T, s *Server) {
|
||||
hasRouteToService := func(nm *netmap.NetworkMap, hostNodeKey key.NodePublic) bool {
|
||||
if nm == nil {
|
||||
return false
|
||||
}
|
||||
for _, peer := range nm.Peers {
|
||||
if peer.Key() == hostNodeKey {
|
||||
for _, aip := range peer.AllowedIPs().All() {
|
||||
if aip == servicePrefix {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
waitForLatestNetmap := func(t *testing.T, s *Server, checkRoute bool, hostNodeKey key.NodePublic) {
|
||||
t.Helper()
|
||||
w := must.Get(s.localClient.WatchIPNBus(t.Context(), ipn.NotifyInitialNetMap))
|
||||
defer w.Close()
|
||||
for n := must.Get(w.Next()); !netmapUpToDate(n.NetMap); n = must.Get(w.Next()) {
|
||||
for {
|
||||
n := must.Get(w.Next())
|
||||
if !hasDNSRecord(n.NetMap) {
|
||||
continue
|
||||
}
|
||||
if checkRoute && !hasRouteToService(n.NetMap, hostNodeKey) {
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
waitForLatestNetmap(t, client)
|
||||
waitForLatestNetmap(t, host)
|
||||
hostNodeKey := host.lb.NodeKey()
|
||||
// Client needs both DNS and route to service host
|
||||
waitForLatestNetmap(t, client, true, hostNodeKey)
|
||||
// Host only needs DNS (it's the one hosting the service)
|
||||
waitForLatestNetmap(t, host, false, key.NodePublic{})
|
||||
}
|
||||
|
||||
func TestListenService(t *testing.T) {
|
||||
|
||||
@ -1,7 +1,12 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build linux && amd64 && !race
|
||||
// This file previously had "!race" in its build constraint, which was a CI
|
||||
// optimization (qemu binaries aren't installed on the race builder to save
|
||||
// time). The constraint was removed because it's not a technical requirement:
|
||||
// the test gracefully skips architectures when qemu-{arch} isn't available.
|
||||
|
||||
//go:build linux && amd64
|
||||
|
||||
package archtest
|
||||
|
||||
|
||||
@ -34,7 +34,6 @@ import (
|
||||
"github.com/miekg/dns"
|
||||
"go4.org/mem"
|
||||
"tailscale.com/client/local"
|
||||
"tailscale.com/client/tailscale"
|
||||
"tailscale.com/cmd/testwrapper/flakytest"
|
||||
"tailscale.com/feature"
|
||||
_ "tailscale.com/feature/clientupdate"
|
||||
@ -1194,40 +1193,46 @@ func TestClientSideJailing(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
b1, err := lc1.WatchIPNBus(context.Background(), 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
b2, err := lc2.WatchIPNBus(context.Background(), 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
waitPeerIsJailed := func(t *testing.T, b *tailscale.IPNBusWatcher, jailed bool) {
|
||||
t.Helper()
|
||||
for {
|
||||
n, err := b.Next()
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Start watchers BEFORE calling SetJailed to ensure we don't miss the update.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
b1, err := lc1.WatchIPNBus(ctx, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if n.NetMap == nil {
|
||||
continue
|
||||
defer b1.Close()
|
||||
b2, err := lc2.WatchIPNBus(ctx, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(n.NetMap.Peers) == 0 {
|
||||
continue
|
||||
}
|
||||
if j := n.NetMap.Peers[0].IsJailed(); j == jailed {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
defer b2.Close()
|
||||
|
||||
env.Control.SetJailed(k1, k2, tc.n2JailedForN1)
|
||||
env.Control.SetJailed(k2, k1, tc.n1JailedForN2)
|
||||
|
||||
// Wait for the jailed status to propagate.
|
||||
waitPeerIsJailed(t, b1, tc.n2JailedForN1)
|
||||
waitPeerIsJailed(t, b2, tc.n1JailedForN2)
|
||||
waitPeerIsJailed := func(b *local.IPNBusWatcher, jailed bool) {
|
||||
t.Helper()
|
||||
for {
|
||||
n, err := b.Next()
|
||||
if err != nil {
|
||||
t.Fatalf("waiting for peer jailed=%v: %v", jailed, err)
|
||||
}
|
||||
if n.NetMap == nil {
|
||||
continue
|
||||
}
|
||||
if len(n.NetMap.Peers) == 0 {
|
||||
continue
|
||||
}
|
||||
if j := n.NetMap.Peers[0].IsJailed(); j == jailed {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
waitPeerIsJailed(b1, tc.n2JailedForN1)
|
||||
waitPeerIsJailed(b2, tc.n1JailedForN2)
|
||||
|
||||
testDial(t, lc1, ip2, port, tc.n1JailedForN2)
|
||||
testDial(t, lc2, ip1, port, tc.n2JailedForN1)
|
||||
@ -2050,18 +2055,28 @@ func TestPeerRelayPing(t *testing.T) {
|
||||
a := pair[0]
|
||||
z := pair[1]
|
||||
err := tstest.WaitFor(time.Second*10, func() error {
|
||||
remoteKey := z.MustStatus().Self.PublicKey
|
||||
// Use Status() instead of MustStatus() to avoid calling t.Fatal
|
||||
// from a goroutine, which can cause a race with test cleanup.
|
||||
zSt, err := z.Status()
|
||||
if err != nil {
|
||||
return fmt.Errorf("z.Status: %w", err)
|
||||
}
|
||||
remoteKey := zSt.Self.PublicKey
|
||||
if err := a.Tailscale("ping", "--until-direct=false", "--c=1", "--timeout=1s", z.AwaitIP4().String()).Run(); err != nil {
|
||||
return err
|
||||
}
|
||||
remotePeer, ok := a.MustStatus().Peer[remoteKey]
|
||||
aSt, err := a.Status()
|
||||
if err != nil {
|
||||
return fmt.Errorf("a.Status: %w", err)
|
||||
}
|
||||
remotePeer, ok := aSt.Peer[remoteKey]
|
||||
if !ok {
|
||||
return fmt.Errorf("%v->%v remote peer not found", a.MustStatus().Self.ID, z.MustStatus().Self.ID)
|
||||
return fmt.Errorf("%v->%v remote peer not found", aSt.Self.ID, zSt.Self.ID)
|
||||
}
|
||||
if len(remotePeer.PeerRelay) == 0 {
|
||||
return fmt.Errorf("%v->%v not using peer relay, curAddr=%v relay=%v", a.MustStatus().Self.ID, z.MustStatus().Self.ID, remotePeer.CurAddr, remotePeer.Relay)
|
||||
return fmt.Errorf("%v->%v not using peer relay, curAddr=%v relay=%v", aSt.Self.ID, zSt.Self.ID, remotePeer.CurAddr, remotePeer.Relay)
|
||||
}
|
||||
t.Logf("%v->%v using peer relay addr: %v", a.MustStatus().Self.ID, z.MustStatus().Self.ID, remotePeer.PeerRelay)
|
||||
t.Logf("%v->%v using peer relay addr: %v", aSt.Self.ID, zSt.Self.ID, remotePeer.PeerRelay)
|
||||
return nil
|
||||
})
|
||||
errCh <- err
|
||||
@ -2256,6 +2271,8 @@ func TestC2NDebugNetmap(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTailnetLock(t *testing.T) {
|
||||
tstest.Shard(t)
|
||||
tstest.Parallel(t)
|
||||
|
||||
// If you run `tailscale lock log` on a node where Tailnet Lock isn't
|
||||
// enabled, you get an error explaining that.
|
||||
|
||||
@ -205,7 +205,9 @@ func TestVarzHandler(t *testing.T) {
|
||||
"string_map",
|
||||
func() *expvar.Map {
|
||||
m := new(expvar.Map)
|
||||
m.Set("a", expvar.NewString("foo"))
|
||||
s := new(expvar.String)
|
||||
s.Set("foo")
|
||||
m.Set("a", s)
|
||||
return m
|
||||
}(),
|
||||
"# skipping \"string_map\" expvar map key \"a\" with unknown value type *expvar.String\n",
|
||||
|
||||
@ -26,7 +26,6 @@ import (
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/util/deephash/testtype"
|
||||
"tailscale.com/util/hashx"
|
||||
"tailscale.com/version"
|
||||
)
|
||||
|
||||
type appendBytes []byte
|
||||
@ -777,10 +776,6 @@ func TestMapCyclicFallback(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestArrayAllocs(t *testing.T) {
|
||||
if version.IsRace() {
|
||||
t.Skip("skipping test under race detector")
|
||||
}
|
||||
|
||||
// In theory, there should be no allocations. However, escape analysis on
|
||||
// certain architectures fails to detect that certain cases do not escape.
|
||||
// This discrepancy currently affects sha256.digest.Sum.
|
||||
@ -801,9 +796,11 @@ func TestArrayAllocs(t *testing.T) {
|
||||
X [32]byte
|
||||
}
|
||||
x := &T{X: [32]byte{1: 1, 2: 2, 3: 3, 4: 4}}
|
||||
var localSink Sum
|
||||
got := int(testing.AllocsPerRun(1000, func() {
|
||||
sink = Hash(x)
|
||||
localSink = Hash(x)
|
||||
}))
|
||||
_ = localSink // prevent compiler from optimizing away the Hash call
|
||||
if got > want {
|
||||
t.Errorf("allocs = %v; want %v", got, want)
|
||||
}
|
||||
|
||||
@ -414,9 +414,11 @@ func TestNewConn(t *testing.T) {
|
||||
stunAddr, stunCleanupFn := stuntest.Serve(t)
|
||||
defer stunCleanupFn()
|
||||
|
||||
port := pickPort(t)
|
||||
// Use port 0 to let the system assign a port, avoiding TOCTOU races
|
||||
// from the previous pickPort approach which would close a socket and
|
||||
// hope to rebind to the same port.
|
||||
conn, err := NewConn(Options{
|
||||
Port: port,
|
||||
Port: 0,
|
||||
DisablePortMapper: true,
|
||||
EndpointsFunc: epFunc,
|
||||
Logf: t.Logf,
|
||||
@ -428,6 +430,13 @@ func TestNewConn(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
// Get the actual port that was assigned
|
||||
port := conn.LocalPort()
|
||||
if port == 0 {
|
||||
t.Fatal("LocalPort returned 0")
|
||||
}
|
||||
|
||||
conn.SetDERPMap(stuntest.DERPMapOf(stunAddr.String()))
|
||||
conn.SetPrivateKey(key.NewNode())
|
||||
|
||||
@ -463,16 +472,6 @@ collectEndpoints:
|
||||
}
|
||||
}
|
||||
|
||||
func pickPort(t testing.TB) uint16 {
|
||||
t.Helper()
|
||||
conn, err := net.ListenPacket("udp4", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer conn.Close()
|
||||
return uint16(conn.LocalAddr().(*net.UDPAddr).Port)
|
||||
}
|
||||
|
||||
func TestPickDERPFallback(t *testing.T) {
|
||||
tstest.PanicOnLog()
|
||||
tstest.ResourceCheck(t)
|
||||
@ -557,6 +556,8 @@ func TestDERPActiveFuncCalledAfterConnect(t *testing.T) {
|
||||
EndpointsFunc: func([]tailcfg.Endpoint) {},
|
||||
DERPActiveFunc: func() {
|
||||
// derpStarted should already be closed when DERPActiveFunc is called.
|
||||
// The goroutine calling this waits on startGate (= derpStarted for
|
||||
// firstDerp), so by the time we get here, derpStarted must be closed.
|
||||
select {
|
||||
case <-conn.derpStarted:
|
||||
resultCh <- true
|
||||
@ -575,8 +576,14 @@ func TestDERPActiveFuncCalledAfterConnect(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if ok := <-resultCh; !ok {
|
||||
t.Error("DERPActiveFunc was called before DERP connection was established")
|
||||
// Add a timeout to avoid hanging forever if DERPActiveFunc is never called.
|
||||
select {
|
||||
case ok := <-resultCh:
|
||||
if !ok {
|
||||
t.Error("DERPActiveFunc was called before DERP connection was established")
|
||||
}
|
||||
case <-time.After(10 * time.Second):
|
||||
t.Fatal("timeout waiting for DERPActiveFunc to be called")
|
||||
}
|
||||
}
|
||||
|
||||
@ -1435,7 +1442,6 @@ func Test32bitAlignment(t *testing.T) {
|
||||
// newTestConn returns a new Conn.
|
||||
func newTestConn(t testing.TB) *Conn {
|
||||
t.Helper()
|
||||
port := pickPort(t)
|
||||
|
||||
bus := eventbustest.NewBus(t)
|
||||
|
||||
@ -1445,6 +1451,7 @@ func newTestConn(t testing.TB) *Conn {
|
||||
}
|
||||
t.Cleanup(func() { netMon.Close() })
|
||||
|
||||
// Use port 0 to let the system assign a port, avoiding TOCTOU races.
|
||||
conn, err := NewConn(Options{
|
||||
NetMon: netMon,
|
||||
EventBus: bus,
|
||||
@ -1452,7 +1459,7 @@ func newTestConn(t testing.TB) *Conn {
|
||||
Metrics: new(usermetric.Registry),
|
||||
DisablePortMapper: true,
|
||||
Logf: t.Logf,
|
||||
Port: port,
|
||||
Port: 0,
|
||||
TestOnlyPacketListener: localhostListener{},
|
||||
EndpointsFunc: func(eps []tailcfg.Endpoint) {
|
||||
t.Logf("endpoints: %q", eps)
|
||||
|
||||
@ -870,6 +870,10 @@ func TestTCPForwardLimits_PerClient(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Capture the initial value of the clientmetric before injecting packets,
|
||||
// since it's a global counter that persists across test runs.
|
||||
initialClientMetric := metricPerClientForwardLimit.Value()
|
||||
|
||||
// Inject the packet to start the TCP forward and wait until we have an
|
||||
// in-flight outgoing connection.
|
||||
mustInjectPacket()
|
||||
@ -901,9 +905,9 @@ func TestTCPForwardLimits_PerClient(t *testing.T) {
|
||||
t.Errorf("got expvar metric %q=%s, want 1", metricName, v)
|
||||
}
|
||||
|
||||
// client metric
|
||||
if v := metricPerClientForwardLimit.Value(); v != 1 {
|
||||
t.Errorf("got clientmetric limit metric=%d, want 1", v)
|
||||
// client metric - check it increased by 1 from initial value
|
||||
if v := metricPerClientForwardLimit.Value(); v != initialClientMetric+1 {
|
||||
t.Errorf("got clientmetric limit metric=%d, want %d", v, initialClientMetric+1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user