tstest/natlab/vmtest: add test loading netmap cache from disk (#19598)

For testing the loading of netmap cache from disk, the cache needs to
exist. The simple solution is to start two nodes and connect them to
control, with the netmap caching capability set. Then cut the connection
to control, restart the nodes, and ping between them.

This tests that we can start from a cache and get to running state, but
also that we are able to establish a connection between the nodes.

For now this is not testing how the nodes are able to talk to each other
(DERP vs direct).

Updates #19597

Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
Claus Lensbøl 2026-05-01 09:46:19 -04:00 committed by GitHub
parent 89a78dc9b7
commit ff9c3f0e00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 117 additions and 0 deletions

View File

@ -162,6 +162,10 @@ type Server struct {
// tkaStorage records the Tailnet Lock state, if any.
// If nil, Tailnet Lock is not enabled in the Tailnet.
tkaStorage tka.CompactableChonk
// onMapRequest, if non-nil, is called at the start of each map poll request.
// It can be used in tests to panic or fail if a node contacts control unexpectedly.
onMapRequest func(nodeKey key.NodePublic)
}
// BaseURL returns the server's base URL, without trailing slash.
@ -1169,6 +1173,12 @@ func (s *Server) serveMap(w http.ResponseWriter, r *http.Request, mkey key.Machi
go panic(fmt.Sprintf("bad map request: %v", err))
}
s.mu.Lock()
if s.onMapRequest != nil {
s.onMapRequest(req.NodeKey)
}
s.mu.Unlock()
if s.AltMapStream != nil {
// The caller takes over the stream entirely; it must handle
// keeping the HTTP response alive until ctx is done.
@ -1620,6 +1630,15 @@ func (s *Server) encode(compress bool, v any) (b []byte, err error) {
return b, nil
}
// SetOnMapRequest sets callback used for testing when a new mapRequest happens.
// Pass nil to remove the callback.
func (s *Server) SetOnMapRequest(f func(key.NodePublic)) {
s.mu.Lock()
defer s.mu.Unlock()
s.onMapRequest = f
}
// filterInvalidIPv6Endpoints removes invalid IPv6 endpoints from eps,
// modify the slice in place, returning the potentially smaller subset (aliasing
// the original memory).

View File

@ -586,6 +586,31 @@ func (e *Env) Start() {
if st2.BackendState != "Running" {
return fmt.Errorf("[%s] state = %q, want Running", n.name, st2.BackendState)
}
// Apply any capabilities for the node to the map.
// SetNodeCapMap pushes an updated map response immediately, then wait
// until the node reports the capability in its status.
if cm := n.vnetNode.WantCapMap(); cm != nil {
e.server.ControlServer().SetNodeCapMap(st2.Self.PublicKey, cm)
if err := tstest.WaitFor(15*time.Second, func() error {
st, err := n.agent.Status(ctx)
if err != nil {
return err
}
if st.Self == nil {
return fmt.Errorf("self is nil")
}
for c := range cm {
if !st.Self.HasCap(c) {
return fmt.Errorf("cap %v not yet received", c)
}
}
return nil
}); err != nil {
return fmt.Errorf("[%s] waiting for capabilities: %w", n.name, err)
}
}
ips := fmt.Sprintf("%v", st2.Self.TailscaleIPs)
e.setNodeTailscale(n.name, "Running "+ips)
t.Logf("[%s] up with %v", n.name, st2.Self.TailscaleIPs)

View File

@ -11,12 +11,14 @@ import (
"testing"
"time"
"tailscale.com/client/local"
"tailscale.com/tailcfg"
"tailscale.com/tstest"
"tailscale.com/tstest/integration/testcontrol"
"tailscale.com/tstest/natlab/vmtest"
"tailscale.com/tstest/natlab/vnet"
"tailscale.com/types/key"
"tailscale.com/types/netmap"
)
func TestMacOSAndLinuxCanPing(t *testing.T) {
@ -905,3 +907,74 @@ func TestMullvadExitNode(t *testing.T) {
env.SetExitNodeIP(client, netip.Addr{})
check(checkOff2Step, "exit-off (again)", clientWAN)
}
// TestCachedNetmapAfterRestart verifies that two nodes with netmap
// caching enabled (NodeAttrCacheNetworkMaps) can re-establish a direct
// WireGuard tunnel after both are restarted while the control server is
// unreachable. After restart the nodes must use only their on-disk cached
// netmaps to re-connect.
func TestCachedNetmapAfterRestart(t *testing.T) {
env := vmtest.New(t)
aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT)
bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT)
aNet.SetPostConnectControlBlackhole(true)
bNet.SetPostConnectControlBlackhole(true)
a := env.AddNode("a", aNet,
vmtest.OS(vmtest.Gokrazy),
tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil})
b := env.AddNode("b", bNet,
vmtest.OS(vmtest.Gokrazy),
tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil})
connectStep := env.AddStep("Establish initial TSMP tunnel")
cutControlStep := env.AddStep("Cut control server access")
restartStep := env.AddStep("Restart tailscaled on both nodes")
netmapCheckStep := env.AddStep("Check netmap loaded is cached")
pingStep := env.AddStep("Ping a → b TSMP (cached netmap, no control)")
env.Start()
connectStep.Begin()
if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil {
connectStep.End(err)
t.Fatal(err)
}
connectStep.End(nil)
cutControlStep.Begin()
aNet.PostConnectedToControl()
bNet.PostConnectedToControl()
env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) {
panic(fmt.Sprintf("got connection from %v", nk))
})
cutControlStep.End(nil)
restartStep.Begin()
env.RestartTailscaled(a)
env.RestartTailscaled(b)
restartStep.End(nil)
netmapCheckStep.Begin()
for _, node := range []*vmtest.Node{a, b} {
nm, err := local.GetDebugResultJSON[netmap.NetworkMap](t.Context(), node.Agent().Client, "current-netmap")
if err != nil {
netmapCheckStep.End(fmt.Errorf("[%s] got err fetching netmap %q", node.Name(), err))
t.Fatalf("[%s] got err fetching netmap %q", node.Name(), err)
}
if !nm.Cached {
netmapCheckStep.End(fmt.Errorf("[%s] expected netmap.Cached = true, got: %t", node.Name(), nm.Cached))
t.Fatalf("[%s] expected netmap.Cached = true, got: %t", node.Name(), nm.Cached)
}
}
netmapCheckStep.End(nil)
pingStep.Begin()
if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil {
pingStep.End(err)
t.Fatal(err)
}
pingStep.End(nil)
}