diff --git a/tstest/integration/testcontrol/testcontrol.go b/tstest/integration/testcontrol/testcontrol.go index 0fdc885a0..c96b1ed33 100644 --- a/tstest/integration/testcontrol/testcontrol.go +++ b/tstest/integration/testcontrol/testcontrol.go @@ -162,6 +162,10 @@ type Server struct { // tkaStorage records the Tailnet Lock state, if any. // If nil, Tailnet Lock is not enabled in the Tailnet. tkaStorage tka.CompactableChonk + + // onMapRequest, if non-nil, is called at the start of each map poll request. + // It can be used in tests to panic or fail if a node contacts control unexpectedly. + onMapRequest func(nodeKey key.NodePublic) } // BaseURL returns the server's base URL, without trailing slash. @@ -1169,6 +1173,12 @@ func (s *Server) serveMap(w http.ResponseWriter, r *http.Request, mkey key.Machi go panic(fmt.Sprintf("bad map request: %v", err)) } + s.mu.Lock() + if s.onMapRequest != nil { + s.onMapRequest(req.NodeKey) + } + s.mu.Unlock() + if s.AltMapStream != nil { // The caller takes over the stream entirely; it must handle // keeping the HTTP response alive until ctx is done. @@ -1620,6 +1630,15 @@ func (s *Server) encode(compress bool, v any) (b []byte, err error) { return b, nil } +// SetOnMapRequest sets callback used for testing when a new mapRequest happens. +// Pass nil to remove the callback. +func (s *Server) SetOnMapRequest(f func(key.NodePublic)) { + s.mu.Lock() + defer s.mu.Unlock() + + s.onMapRequest = f +} + // filterInvalidIPv6Endpoints removes invalid IPv6 endpoints from eps, // modify the slice in place, returning the potentially smaller subset (aliasing // the original memory). diff --git a/tstest/natlab/vmtest/vmtest.go b/tstest/natlab/vmtest/vmtest.go index 9cc72b931..9b029a119 100644 --- a/tstest/natlab/vmtest/vmtest.go +++ b/tstest/natlab/vmtest/vmtest.go @@ -586,6 +586,31 @@ func (e *Env) Start() { if st2.BackendState != "Running" { return fmt.Errorf("[%s] state = %q, want Running", n.name, st2.BackendState) } + + // Apply any capabilities for the node to the map. + // SetNodeCapMap pushes an updated map response immediately, then wait + // until the node reports the capability in its status. + if cm := n.vnetNode.WantCapMap(); cm != nil { + e.server.ControlServer().SetNodeCapMap(st2.Self.PublicKey, cm) + if err := tstest.WaitFor(15*time.Second, func() error { + st, err := n.agent.Status(ctx) + if err != nil { + return err + } + if st.Self == nil { + return fmt.Errorf("self is nil") + } + for c := range cm { + if !st.Self.HasCap(c) { + return fmt.Errorf("cap %v not yet received", c) + } + } + return nil + }); err != nil { + return fmt.Errorf("[%s] waiting for capabilities: %w", n.name, err) + } + } + ips := fmt.Sprintf("%v", st2.Self.TailscaleIPs) e.setNodeTailscale(n.name, "Running "+ips) t.Logf("[%s] up with %v", n.name, st2.Self.TailscaleIPs) diff --git a/tstest/natlab/vmtest/vmtest_test.go b/tstest/natlab/vmtest/vmtest_test.go index 5521bd8bc..cadf570d1 100644 --- a/tstest/natlab/vmtest/vmtest_test.go +++ b/tstest/natlab/vmtest/vmtest_test.go @@ -11,12 +11,14 @@ import ( "testing" "time" + "tailscale.com/client/local" "tailscale.com/tailcfg" "tailscale.com/tstest" "tailscale.com/tstest/integration/testcontrol" "tailscale.com/tstest/natlab/vmtest" "tailscale.com/tstest/natlab/vnet" "tailscale.com/types/key" + "tailscale.com/types/netmap" ) func TestMacOSAndLinuxCanPing(t *testing.T) { @@ -905,3 +907,74 @@ func TestMullvadExitNode(t *testing.T) { env.SetExitNodeIP(client, netip.Addr{}) check(checkOff2Step, "exit-off (again)", clientWAN) } + +// TestCachedNetmapAfterRestart verifies that two nodes with netmap +// caching enabled (NodeAttrCacheNetworkMaps) can re-establish a direct +// WireGuard tunnel after both are restarted while the control server is +// unreachable. After restart the nodes must use only their on-disk cached +// netmaps to re-connect. +func TestCachedNetmapAfterRestart(t *testing.T) { + env := vmtest.New(t) + + aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT) + bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT) + + aNet.SetPostConnectControlBlackhole(true) + bNet.SetPostConnectControlBlackhole(true) + + a := env.AddNode("a", aNet, + vmtest.OS(vmtest.Gokrazy), + tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil}) + b := env.AddNode("b", bNet, + vmtest.OS(vmtest.Gokrazy), + tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil}) + + connectStep := env.AddStep("Establish initial TSMP tunnel") + cutControlStep := env.AddStep("Cut control server access") + restartStep := env.AddStep("Restart tailscaled on both nodes") + netmapCheckStep := env.AddStep("Check netmap loaded is cached") + pingStep := env.AddStep("Ping a → b TSMP (cached netmap, no control)") + + env.Start() + + connectStep.Begin() + if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil { + connectStep.End(err) + t.Fatal(err) + } + connectStep.End(nil) + + cutControlStep.Begin() + aNet.PostConnectedToControl() + bNet.PostConnectedToControl() + env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) { + panic(fmt.Sprintf("got connection from %v", nk)) + }) + cutControlStep.End(nil) + + restartStep.Begin() + env.RestartTailscaled(a) + env.RestartTailscaled(b) + restartStep.End(nil) + + netmapCheckStep.Begin() + for _, node := range []*vmtest.Node{a, b} { + nm, err := local.GetDebugResultJSON[netmap.NetworkMap](t.Context(), node.Agent().Client, "current-netmap") + if err != nil { + netmapCheckStep.End(fmt.Errorf("[%s] got err fetching netmap %q", node.Name(), err)) + t.Fatalf("[%s] got err fetching netmap %q", node.Name(), err) + } + if !nm.Cached { + netmapCheckStep.End(fmt.Errorf("[%s] expected netmap.Cached = true, got: %t", node.Name(), nm.Cached)) + t.Fatalf("[%s] expected netmap.Cached = true, got: %t", node.Name(), nm.Cached) + } + } + netmapCheckStep.End(nil) + + pingStep.Begin() + if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil { + pingStep.End(err) + t.Fatal(err) + } + pingStep.End(nil) +}