From 15cba0a3f66fc2f5a481ec9d986f94e9a95d2935 Mon Sep 17 00:00:00 2001
From: Brad Fitzpatrick <bradfitz@tailscale.com>
Date: Tue, 28 Apr 2026 21:29:05 +0000
Subject: [PATCH] tstest/natlab/vmtest: add TestDiscoKeyChange
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a vmtest that brings up two gokrazy nodes A and B behind two
One2OneNAT networks (so direct UDP works in both directions and any
slowness can't be blamed on NAT traversal), establishes a WireGuard
tunnel A → B with TSMP, then rotates B's disco key four times and
asserts that the data plane recovers in both directions after each
rotation. All pings are TSMP (the data-plane ping; disco pings would
not exercise the WireGuard tunnel itself).

The five pings:

  1. A → B  (initial; brings up the tunnel; 30s budget)
  2. B → A  after rotate (LocalAPI rotate-disco-key debug action)
  3. A → B  after rotate (LocalAPI)
  4. B → A  after restart (SIGKILL; gokrazy supervisor respawns)
  5. A → B  after restart (SIGKILL)

Each post-rotation ping gets a 15-second budget. Two unavoidable
multi-second waits dominate today:

  - The rotate-then-a→b phase takes ~10s on main because of LazyWG.
    After B's WantRunning bounce, B's wgengine resets its
    sentActivityAt/recvActivityAt maps and trims A out of the
    wireguard-go config as an "idle peer"; B only re-adds A on
    inbound activity, by which point A's first few TSMP packets
    have been silently dropped at B's tundev. The
    bradfitz/rm_lazy_wg branch removes that trimming entirely
    (verified locally: this phase drops to <100ms there).

  - The restart phases take ~5s for wireguard-go's RekeyTimeout
    handshake retry. After SIGKILL+respawn the first WG handshake
    init from the restarted node sometimes goes into the void
    (likely the brief peer-removed window in the receiver's
    two-step maybeReconfigWireguardLocked reconfig during which
    the peer is absent from wireguard-go), and wg-go's 5s+jitter
    retransmit timer is the next opportunity to retry. That retry
    succeeds and the staged TSMP packet flushes. Intrinsic to the
    protocol's retransmit policy.

Once LazyWG is removed and the first-handshake-after-reconfig race
is fixed, the budget should drop to 5s.

Supporting changes:

  ipn/ipnlocal: DebugRotateDiscoKey now toggles WantRunning off and
  back on after rotating the disco key. magicsock.Conn.RotateDiscoKey
  only resets local disco state; without also dropping wireguard-go
  session keys, peers keep encrypting with their stale per-peer
  session against us until their rekey timer fires (WireGuard has no
  data-plane signaling to invalidate sessions). Bouncing WantRunning
  runs the engine through Reconfig(empty) → authReconfig, which
  drops every peer's WG session so the next packet either way
  triggers a fresh handshake.

  ipn/ipnlocal, ipn/localapi: add a debug-only "peer-disco-keys"
  LocalAPI action ([LocalBackend.DebugPeerDiscoKeys]) that returns
  a map[NodePublic]DiscoPublic from the current netmap. Tests reach
  it via [local.Client.DebugResultJSON]. We do not surface disco
  keys via [ipnstate.PeerStatus] because adding a non-comparable
  [key.DiscoPublic] field there breaks reflect-based test helpers
  (e.g. TestFilterFormatAndSortExitNodes' use of cmp.Diff), and
  general LocalAPI clients have no need for disco keys. Since the
  debug LocalAPI is gated behind the ts_omit_debug build tag, this
  endpoint is automatically stripped from small binaries.

  cmd/tta: add /restart-tailscaled handler (Linux-only, via /proc walk)
  to drive the SIGKILL phase. On gokrazy the supervisor respawns
  tailscaled within a second.

  tstest/integration/testcontrol: add Server.AllOnline. When set,
  every peer entry in MapResponses is marked Online=true. Several
  disco-key handling fast paths in controlclient and wgengine
  (removeUnwantedDiscoUpdates, removeUnwantedDiscoUpdatesFromFull
  NetmapUpdate, the wgengine tsmpLearnedDisco fast path) only fire
  for online peers; without this flag, tests exercising disco-key
  rotation only hit the offline-peer code paths, which mask issues
  and are several seconds slower in this scenario. Finer-grained
  per-node online tracking can be added later.

  tstest/natlab/vmtest: add Env.RotateDiscoKey,
  Env.RestartTailscaled, Env.PeerDiscoKey, Node.Name, an
  [AllOnline] EnvOption that plumbs through to
  testcontrol.Server.AllOnline, and an exported
  Env.Ping(from, to, type, timeout). Ping replaces the unexported
  helper so callers can specify both a ping type (PingDisco for
  warming peer state, PingTSMP for asserting end-to-end
  connectivity) and a deadline. PeerDiscoKey returns its LocalAPI
  error so callers inside tstest.WaitFor can retry transient
  failures rather than fataling the test.

Updates #12639
Updates #13038

Change-Id: I3644f27fc30e52990ba25a3983498cc582ddb958
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
---
 cmd/tta/restart_tailscaled_linux.go           |  47 ++++
 cmd/tta/tta.go                                |  17 ++
 ipn/ipnlocal/local.go                         |  36 +++
 ipn/localapi/debug.go                         |   6 +
 tstest/integration/testcontrol/testcontrol.go |  19 ++
 tstest/natlab/vmtest/vmtest.go                | 182 +++++++++++++-
 tstest/natlab/vmtest/vmtest_test.go           | 223 ++++++++++++++++++
 7 files changed, 519 insertions(+), 11 deletions(-)
 create mode 100644 cmd/tta/restart_tailscaled_linux.go

diff --git a/cmd/tta/restart_tailscaled_linux.go b/cmd/tta/restart_tailscaled_linux.go
new file mode 100644
index 000000000..accf2b404
--- /dev/null
+++ b/cmd/tta/restart_tailscaled_linux.go
@@ -0,0 +1,47 @@
+// Copyright (c) Tailscale Inc & contributors
+// SPDX-License-Identifier: BSD-3-Clause
+
+package main
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+)
+
+func init() {
+	restartTailscaled = restartTailscaledLinux
+}
+
+// restartTailscaledLinux finds the tailscaled process by walking /proc and
+// sends it SIGKILL. On gokrazy, the supervisor will restart tailscaled within
+// a few seconds. The PID of the process that was killed is returned.
+func restartTailscaledLinux() (int, error) {
+	ents, err := os.ReadDir("/proc")
+	if err != nil {
+		return 0, err
+	}
+	for _, e := range ents {
+		pid, err := strconv.Atoi(e.Name())
+		if err != nil {
+			continue
+		}
+		comm, err := os.ReadFile("/proc/" + e.Name() + "/comm")
+		if err != nil {
+			continue
+		}
+		if strings.TrimSpace(string(comm)) != "tailscaled" {
+			continue
+		}
+		proc, err := os.FindProcess(pid)
+		if err != nil {
+			return 0, err
+		}
+		if err := proc.Kill(); err != nil {
+			return 0, fmt.Errorf("killing tailscaled pid %d: %w", pid, err)
+		}
+		return pid, nil
+	}
+	return 0, fmt.Errorf("tailscaled process not found in /proc")
+}
diff --git a/cmd/tta/tta.go b/cmd/tta/tta.go
index 4d59b827b..5dd1eddb9 100644
--- a/cmd/tta/tta.go
+++ b/cmd/tta/tta.go
@@ -391,6 +391,18 @@ func main() {
 		}
 		wgServerUp(w, r)
 	})
+	ttaMux.HandleFunc("/restart-tailscaled", func(w http.ResponseWriter, r *http.Request) {
+		if restartTailscaled == nil {
+			http.Error(w, "restart-tailscaled not supported on this platform", http.StatusNotImplemented)
+			return
+		}
+		pid, err := restartTailscaled()
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+			return
+		}
+		fmt.Fprintf(w, "killed tailscaled pid %d (supervisor will respawn)\n", pid)
+	})
 	ttaMux.HandleFunc("/logs", func(w http.ResponseWriter, r *http.Request) {
 		logBuf.mu.Lock()
 		defer logBuf.mu.Unlock()
@@ -600,6 +612,11 @@ var addFirewall func() error // set by fw_linux.go
 // non-Linux.
 var wgServerUp func(w http.ResponseWriter, r *http.Request)
 
+// restartTailscaled sends SIGKILL to the local tailscaled process so the
+// gokrazy supervisor restarts it. It is set by restart_tailscaled_linux.go
+// and is nil on non-Linux.
+var restartTailscaled func() (pid int, err error)
+
 // logBuffer is a bytes.Buffer that is safe for concurrent use
 // intended to capture early logs from the process, even if
 // gokrazy's syslog streaming isn't working or yet working.
diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go
index b97c43d59..5719fef91 100644
--- a/ipn/ipnlocal/local.go
+++ b/ipn/ipnlocal/local.go
@@ -7093,11 +7093,28 @@ func (b *LocalBackend) DebugRotateDiscoKey() error {
 
 	b.mu.Lock()
 	cc := b.cc
+	wantRunning := b.pm.CurrentPrefs().WantRunning()
 	b.mu.Unlock()
 	if cc != nil {
 		cc.SetDiscoPublicKey(newDiscoKey)
 	}
 
+	// Bounce WantRunning to fully reset wireguard-go state for all peers.
+	if wantRunning {
+		if _, err := b.EditPrefs(&ipn.MaskedPrefs{
+			Prefs:          ipn.Prefs{WantRunning: false},
+			WantRunningSet: true,
+		}); err != nil {
+			return err
+		}
+		if _, err := b.EditPrefs(&ipn.MaskedPrefs{
+			Prefs:          ipn.Prefs{WantRunning: true},
+			WantRunningSet: true,
+		}); err != nil {
+			return err
+		}
+	}
+
 	return nil
 }
 
@@ -7105,6 +7122,25 @@ func (b *LocalBackend) DebugPeerRelayServers() set.Set[netip.Addr] {
 	return b.MagicConn().PeerRelays()
 }
 
+// DebugPeerDiscoKeys returns the disco public keys this node has learned for
+// each of its peers from the most recent network map. Intended for tests
+// (the production [ipnstate.PeerStatus] purposefully does not surface disco
+// keys; surfacing them via the [ipnstate.Status] API would also pollute
+// every PeerStatus consumer with a non-comparable struct field).
+func (b *LocalBackend) DebugPeerDiscoKeys() map[key.NodePublic]key.DiscoPublic {
+	nm := b.currentNode().NetMap()
+	if nm == nil {
+		return nil
+	}
+	m := make(map[key.NodePublic]key.DiscoPublic, len(nm.Peers))
+	for _, p := range nm.Peers {
+		if dk := p.DiscoKey(); !dk.IsZero() {
+			m[p.Key()] = dk
+		}
+	}
+	return m
+}
+
 // ControlKnobs returns the node's control knobs.
 func (b *LocalBackend) ControlKnobs() *controlknobs.Knobs {
 	return b.sys.ControlKnobs()
diff --git a/ipn/localapi/debug.go b/ipn/localapi/debug.go
index d8e46040d..dac39195a 100644
--- a/ipn/localapi/debug.go
+++ b/ipn/localapi/debug.go
@@ -232,6 +232,12 @@ func (h *Handler) serveDebug(w http.ResponseWriter, r *http.Request) {
 		if err == nil {
 			return
 		}
+	case "peer-disco-keys":
+		w.Header().Set("Content-Type", "application/json")
+		err = json.NewEncoder(w).Encode(h.b.DebugPeerDiscoKeys())
+		if err == nil {
+			return
+		}
 	case "rotate-disco-key":
 		err = h.b.DebugRotateDiscoKey()
 	case "statedir":
diff --git a/tstest/integration/testcontrol/testcontrol.go b/tstest/integration/testcontrol/testcontrol.go
index 97a193c51..95df1f5a6 100644
--- a/tstest/integration/testcontrol/testcontrol.go
+++ b/tstest/integration/testcontrol/testcontrol.go
@@ -69,6 +69,22 @@ type Server struct {
 	// belong to the same user.
 	AllNodesSameUser bool
 
+	// AllOnline, if true, marks every peer entry in MapResponses as
+	// Online=true. This is a coarse stand-in for the per-node
+	// online/offline tracking that production control servers do based
+	// on streaming map sessions: certain disco-key handling fast paths
+	// in [tailscale.com/control/controlclient] and
+	// [tailscale.com/wgengine/userspace] only fire when the peer is
+	// reported online, so without this flag they are silently skipped
+	// in tests, which can mask bugs and slow down recovery from disco
+	// rotations. See [tailscale.com/control/controlclient/map.go]
+	// removeUnwantedDiscoUpdates and
+	// removeUnwantedDiscoUpdatesFromFullNetmapUpdate for callers that
+	// branch on Online.
+	//
+	// Finer-grained per-node online tracking can be added later.
+	AllOnline bool
+
 	// DefaultNodeCapabilities overrides the capability map sent to each client.
 	DefaultNodeCapabilities *tailcfg.NodeCapMap
 
@@ -1405,6 +1421,9 @@ func (s *Server) MapResponse(req *tailcfg.MapRequest) (res *tailcfg.MapResponse,
 			p.PrimaryRoutes = routes
 			p.AllowedIPs = append(p.AllowedIPs, routes...)
 		}
+		if s.AllOnline {
+			p.Online = new(true)
+		}
 		res.Peers = append(res.Peers, p)
 	}
 
diff --git a/tstest/natlab/vmtest/vmtest.go b/tstest/natlab/vmtest/vmtest.go
index bfc935ac0..9cc72b931 100644
--- a/tstest/natlab/vmtest/vmtest.go
+++ b/tstest/natlab/vmtest/vmtest.go
@@ -19,6 +19,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/base64"
+	"encoding/json"
 	"flag"
 	"fmt"
 	"io"
@@ -43,6 +44,7 @@ import (
 	"tailscale.com/ipn"
 	"tailscale.com/ipn/ipnstate"
 	"tailscale.com/tailcfg"
+	"tailscale.com/tstest"
 	"tailscale.com/tstest/integration/testcontrol"
 	"tailscale.com/tstest/natlab/vnet"
 	"tailscale.com/types/key"
@@ -85,6 +87,7 @@ type Env struct {
 	qemuProcs []*exec.Cmd // launched QEMU processes
 
 	sameTailnetUser bool // all nodes register as the same Tailnet user
+	allOnline       bool // mark every peer as Online=true in MapResponses
 
 	// Shared resource initialization (sync.Once for things multiple nodes share).
 	vnetOnce      sync.Once
@@ -346,6 +349,16 @@ func SameTailnetUser() EnvOption {
 	return envOptFunc(func(e *Env) { e.sameTailnetUser = true })
 }
 
+// AllOnline returns an [EnvOption] that makes the test control server mark
+// every peer as Online=true in MapResponses (testcontrol.Server.AllOnline).
+// Several disco-key handling fast paths in the controlclient and wgengine
+// only fire when the peer is reported online; without this option those
+// paths are silently skipped, which can mask bugs and slow down recovery
+// from disco-key rotations.
+func AllOnline() EnvOption {
+	return envOptFunc(func(e *Env) { e.allOnline = true })
+}
+
 // AddNetwork creates a new virtual network. Arguments follow the same pattern as
 // vnet.Config.AddNetwork (string IPs, NAT types, NetworkService values).
 func (e *Env) AddNetwork(opts ...any) *vnet.Network {
@@ -414,6 +427,11 @@ func (e *Env) AddNode(name string, opts ...any) *Node {
 
 // LanIP returns the LAN IPv4 address of this node on the given network.
 // This is only valid after Env.Start() has been called.
+// Name returns the node's name as set in [Env.AddNode].
+func (n *Node) Name() string {
+	return n.name
+}
+
 func (n *Node) LanIP(net *vnet.Network) netip.Addr {
 	return n.vnetNode.LanIP(net)
 }
@@ -864,33 +882,172 @@ func (e *Env) ApproveRoutes(n *Node, routes ...string) {
 	}
 }
 
-// ping pings from one node to another's Tailscale IP, retrying until it succeeds
-// or the timeout expires. This establishes the WireGuard tunnel between the nodes.
+// ping does a disco ping from one node to another's Tailscale IP, retrying
+// for up to 30 seconds, fataling on failure. It is used internally to wake
+// up magicsock peer state before a test runs; tests that want to assert
+// connectivity should use [Env.Ping] with the appropriate ping type and
+// timeout.
 func (e *Env) ping(from, to *Node) {
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	e.t.Helper()
+	if err := e.Ping(from, to, tailcfg.PingDisco, 30*time.Second); err != nil {
+		e.t.Fatal(err)
+	}
+}
+
+// Ping pings from one node to another's Tailscale IP using the given ping
+// type, retrying until it succeeds or timeout expires. It returns the error
+// from the last attempt if the timeout expires. Unlike the internal ping
+// helper, it does not fatal the test on failure; callers can check the error
+// to assert on timing.
+//
+// [tailcfg.PingTSMP] actually flows packets across the WireGuard tunnel and is
+// the right choice for asserting end-to-end connectivity.
+// [tailcfg.PingDisco] only exchanges disco messages between magicsock layers
+// and is useful for warming up peer state without requiring a working tunnel.
+func (e *Env) Ping(from, to *Node, ptype tailcfg.PingType, timeout time.Duration) error {
+	e.t.Helper()
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 	defer cancel()
 
 	toSt, err := to.agent.Status(ctx)
 	if err != nil {
-		e.t.Fatalf("ping: can't get %s status: %v", to.name, err)
+		return fmt.Errorf("ping: can't get %s status: %w", to.name, err)
 	}
 	if len(toSt.Self.TailscaleIPs) == 0 {
-		e.t.Fatalf("ping: %s has no Tailscale IPs", to.name)
+		return fmt.Errorf("ping: %s has no Tailscale IPs", to.name)
 	}
 	targetIP := toSt.Self.TailscaleIPs[0]
 
+	var lastErr error
 	for {
-		pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second)
-		pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{})
+		// Per-attempt timeout: cap at 3s but never exceed the remaining budget.
+		attemptTimeout := 3 * time.Second
+		if d := time.Until(deadline(ctx)); d < attemptTimeout {
+			attemptTimeout = d
+		}
+		if attemptTimeout <= 0 {
+			break
+		}
+		pingCtx, pingCancel := context.WithTimeout(ctx, attemptTimeout)
+		pr, err := from.agent.PingWithOpts(pingCtx, targetIP, ptype, local.PingOpts{})
 		pingCancel()
 		if err == nil && pr.Err == "" {
-			e.logVerbosef("ping: %s -> %s OK", from.name, targetIP)
-			return
+			e.logVerbosef("ping(%s): %s -> %s OK", ptype, from.name, targetIP)
+			return nil
+		}
+		switch {
+		case err != nil:
+			lastErr = err
+		case pr.Err != "":
+			lastErr = fmt.Errorf("%s", pr.Err)
 		}
 		if ctx.Err() != nil {
-			e.t.Fatalf("ping: %s -> %s timed out", from.name, targetIP)
+			break
 		}
-		time.Sleep(time.Second)
+		time.Sleep(500 * time.Millisecond)
+	}
+	if lastErr == nil {
+		lastErr = ctx.Err()
+	}
+	return fmt.Errorf("ping(%s): %s -> %s (%s) timed out after %v: %w", ptype, from.name, to.name, targetIP, timeout, lastErr)
+}
+
+// deadline returns ctx's deadline, or a zero Time if it has none.
+func deadline(ctx context.Context) time.Time {
+	d, _ := ctx.Deadline()
+	return d
+}
+
+// PeerDiscoKey returns n's view of the given peer's disco key. It returns a
+// non-nil error if the LocalAPI request fails (e.g. tailscaled briefly
+// unavailable during a restart). It returns (zero, false, nil) if n is
+// reachable but has no record of the given peer in its current netmap.
+//
+// PeerDiscoKey is suitable for use inside a [tstest.WaitFor] poll loop: it
+// does not fatal the test on transient errors.
+//
+// The disco key is fetched from the debug-only "peer-disco-keys" LocalAPI
+// action ([ipnlocal.LocalBackend.DebugPeerDiscoKeys]) rather than via
+// [ipnstate.Status], to keep the production PeerStatus struct free of disco
+// keys (and free of non-comparable fields like [key.DiscoPublic] that break
+// reflect-based test helpers).
+func (e *Env) PeerDiscoKey(n *Node, peer key.NodePublic) (key.DiscoPublic, bool, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	got, err := n.agent.DebugResultJSON(ctx, "peer-disco-keys")
+	if err != nil {
+		return key.DiscoPublic{}, false, err
+	}
+	// DebugResultJSON returns the result as a generic any (the body is
+	// re-decoded into any), so the map comes back keyed by string text-
+	// encoded node keys. Re-marshal+unmarshal into a typed map for cleaner
+	// lookup. (Roundtripping through JSON is fine for a test helper.)
+	raw, err := json.Marshal(got)
+	if err != nil {
+		return key.DiscoPublic{}, false, fmt.Errorf("re-marshal: %w", err)
+	}
+	var m map[key.NodePublic]key.DiscoPublic
+	if err := json.Unmarshal(raw, &m); err != nil {
+		return key.DiscoPublic{}, false, fmt.Errorf("unmarshal peer-disco-keys: %w", err)
+	}
+	d, ok := m[peer]
+	return d, ok, nil
+}
+
+// RotateDiscoKey asks tailscaled on n to rotate its discovery (magicsock) key
+// in place via the LocalAPI debug action. The node key, control connection,
+// and other tailscaled state are unaffected. It fatals the test on error.
+func (e *Env) RotateDiscoKey(n *Node) {
+	e.t.Helper()
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+	if err := n.agent.DebugAction(ctx, "rotate-disco-key"); err != nil {
+		e.t.Fatalf("RotateDiscoKey(%s): %v", n.name, err)
+	}
+}
+
+// RestartTailscaled signals tailscaled on n to die so that its supervisor
+// (gokrazy) restarts it. It then waits for tailscaled to come back to the
+// "Running" backend state. It fatals the test on error.
+//
+// Restarting tailscaled is currently only supported on gokrazy nodes.
+func (e *Env) RestartTailscaled(n *Node) {
+	e.t.Helper()
+	if !n.os.IsGokrazy {
+		e.t.Fatalf("RestartTailscaled(%s): only supported on gokrazy nodes (have %q)", n.name, n.os.Name)
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, "GET", "http://unused/restart-tailscaled", nil)
+	if err != nil {
+		e.t.Fatalf("RestartTailscaled(%s): %v", n.name, err)
+	}
+	res, err := n.agent.HTTPClient.Do(req)
+	if err != nil {
+		e.t.Fatalf("RestartTailscaled(%s): %v", n.name, err)
+	}
+	body, _ := io.ReadAll(res.Body)
+	res.Body.Close()
+	if res.StatusCode != 200 {
+		e.t.Fatalf("RestartTailscaled(%s): %s: %s", n.name, res.Status, body)
+	}
+	e.t.Logf("[%s] %s", n.name, strings.TrimSpace(string(body)))
+
+	// Wait for tailscaled to come back. Status calls will fail while the unix
+	// socket is gone, then return Starting/NeedsLogin briefly before settling
+	// on Running.
+	if err := tstest.WaitFor(45*time.Second, func() error {
+		st, err := n.agent.Status(ctx)
+		if err != nil {
+			return err
+		}
+		if st.BackendState != "Running" {
+			return fmt.Errorf("backend state = %q", st.BackendState)
+		}
+		return nil
+	}); err != nil {
+		e.t.Fatalf("RestartTailscaled(%s): waiting for Running: %v", n.name, err)
 	}
 }
 
@@ -1094,6 +1251,9 @@ func (e *Env) initVnet() {
 		if e.sameTailnetUser {
 			e.server.ControlServer().AllNodesSameUser = true
 		}
+		if e.allOnline {
+			e.server.ControlServer().AllOnline = true
+		}
 	})
 }
 
diff --git a/tstest/natlab/vmtest/vmtest_test.go b/tstest/natlab/vmtest/vmtest_test.go
index 260bc2fd4..5521bd8bc 100644
--- a/tstest/natlab/vmtest/vmtest_test.go
+++ b/tstest/natlab/vmtest/vmtest_test.go
@@ -9,11 +9,14 @@ import (
 	"net/netip"
 	"strings"
 	"testing"
+	"time"
 
 	"tailscale.com/tailcfg"
+	"tailscale.com/tstest"
 	"tailscale.com/tstest/integration/testcontrol"
 	"tailscale.com/tstest/natlab/vmtest"
 	"tailscale.com/tstest/natlab/vnet"
+	"tailscale.com/types/key"
 )
 
 func TestMacOSAndLinuxCanPing(t *testing.T) {
@@ -534,6 +537,226 @@ func TestExitNode(t *testing.T) {
 	}
 }
 
+// TestDiscoKeyChange verifies that when one node's disco key rotates without
+// its WireGuard node key changing, peers detect the change, tear down stale
+// WireGuard session state for that peer, and re-establish the tunnel in both
+// directions. This exercises the disco-key-change handling that the
+// bradfitz/rm_lazy_wg branch relies on for traffic to and from a peer whose
+// magicsock state has been reset.
+//
+// Topology: two gokrazy nodes A and B, each on its own One2OneNAT network so
+// every connection between them is a direct UDP path with no port-mapping or
+// filtering. With NAT effects out of the way, what we measure here is the
+// speed of disco-key-change reconciliation in wgengine/magicsock alone. The
+// test control server is also configured with [testcontrol.Server.AllOnline]
+// (via [vmtest.AllOnline]) so the controlclient/wgengine fast paths that
+// branch on Online actually fire — without that flag the test exercises
+// only the offline-peer code paths, which mask separate latent issues and
+// are several seconds slower.
+//
+// The test runs four B-side rotations followed by a TSMP ping in the
+// requested direction:
+//
+//	rotate (LocalAPI rotate-disco-key) → ping B → A
+//	rotate (LocalAPI rotate-disco-key) → ping A → B
+//	restart  (SIGKILL tailscaled)      → ping B → A
+//	restart  (SIGKILL tailscaled)      → ping A → B
+//
+// Plus an initial A→B TSMP ping with a generous 30s budget to bring up the
+// WireGuard tunnel before the rotations begin (so the post-rotation pings
+// measure stale-state recovery, not first-time setup). All pings are TSMP
+// because TSMP traverses the actual WireGuard data plane; PingDisco only
+// exercises the magicsock disco layer and would mask any stale WG session
+// problems.
+//
+// Two rotation methods are exercised:
+//
+//   - LocalAPI rotate-disco-key (debug action): rolls B's magicsock disco
+//     private key in place, then bounces WantRunning to force wgengine to
+//     drop wireguard-go session keys for every peer (RotateDiscoKey alone
+//     only touches local disco state; without the WantRunning bounce, B
+//     keeps using stale per-peer session keys against A and A drops
+//     everything until B's WG rekey timer eventually fires).
+//   - SIGKILL of tailscaled (via TTA's /kill-tailscaled): the gokrazy
+//     supervisor respawns tailscaled, fully resetting B's magicsock and
+//     wgengine state in addition to rotating the disco key.
+//
+// Each post-rotation ping currently gets a 15-second budget. On a
+// hypothetical perfect build it should take well under a second. In
+// practice today there are two unavoidable multi-second waits:
+//
+//   - The rotate-then-a→b phase on main takes ~10s for LazyWG. After
+//     B's WantRunning bounce, B's wgengine resets its sentActivityAt/
+//     recvActivityAt maps and trims A out of the wireguard-go config
+//     as an "idle peer"; B only re-adds A on inbound activity, by
+//     which point A's first few TSMP packets have been silently
+//     dropped at B's tundev. The bradfitz/rm_lazy_wg branch removes
+//     that trimming entirely (verified locally), so this phase will
+//     drop to <100ms once that branch lands.
+//
+//   - The restart phases take ~5s for the wireguard-go handshake retry
+//     timer. After SIGKILL+respawn the first WG handshake init from
+//     the restarted node sometimes goes into the void (likely the
+//     brief peer-removed window in the receiver's two-step
+//     [wgengine.userspaceEngine.maybeReconfigWireguardLocked] reconfig
+//     during which the peer is absent from wireguard-go), and wg-go's
+//     [device.RekeyTimeout] of 5s + jitter is the next opportunity to
+//     retry. That retry succeeds and the staged TSMP packet flushes.
+//     This is intrinsic to the protocol's retransmit policy.
+//
+// Once LazyWG is removed and the first-handshake-after-reconfig race
+// is fixed, this budget should be tightened to 5s (or less).
+//
+// All four rotations also assert that B's WireGuard node key is unchanged.
+func TestDiscoKeyChange(t *testing.T) {
+	// AllOnline makes the test control server mark every peer as Online=true
+	// in its MapResponses. Several disco-key handling fast paths
+	// (controlclient.removeUnwantedDiscoUpdates,
+	// removeUnwantedDiscoUpdatesFromFullNetmapUpdate, and the wgengine
+	// tsmpLearnedDisco fast path) only fire for online peers. Production
+	// control servers always populate Online; without this flag the test
+	// would only exercise the offline-peer paths.
+	env := vmtest.New(t, vmtest.AllOnline())
+
+	// One2OneNAT so each node has a 1:1 mapping to a public WAN IP with no
+	// port-translation or address-port filtering. This makes A↔B traffic
+	// behave like two unfirewalled hosts on the public internet, so any
+	// slowness we observe in this test cannot be blamed on NAT traversal.
+	aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.One2OneNAT)
+	bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.One2OneNAT)
+
+	a := env.AddNode("a", aNet, vmtest.OS(vmtest.Gokrazy))
+	b := env.AddNode("b", bNet, vmtest.OS(vmtest.Gokrazy))
+
+	type phase struct {
+		name      string
+		rotate    func()
+		pingFrom  *vmtest.Node
+		pingTo    *vmtest.Node
+		applyStep *vmtest.Step
+		verify    *vmtest.Step
+		wait      *vmtest.Step
+		ping      *vmtest.Step
+	}
+	phases := []*phase{
+		{name: "rotate (LocalAPI), b → a", pingFrom: b, pingTo: a, rotate: func() { env.RotateDiscoKey(b) }},
+		{name: "rotate (LocalAPI), a → b", pingFrom: a, pingTo: b, rotate: func() { env.RotateDiscoKey(b) }},
+		{name: "restart, b → a", pingFrom: b, pingTo: a, rotate: func() { env.RestartTailscaled(b) }},
+		{name: "restart, a → b", pingFrom: a, pingTo: b, rotate: func() { env.RestartTailscaled(b) }},
+	}
+
+	pingABStep := env.AddStep("Ping a → b TSMP (establish tunnel)")
+	for _, p := range phases {
+		p.applyStep = env.AddStep("Apply: " + p.name)
+		p.verify = env.AddStep("Verify b: same node key, new disco key (" + p.name + ")")
+		p.wait = env.AddStep("Wait for a to see b's new disco key (" + p.name + ")")
+		p.ping = env.AddStep("Ping " + p.pingFrom.Name() + " → " + p.pingTo.Name() + " TSMP (" + p.name + ")")
+	}
+
+	env.Start()
+
+	pingABStep.Begin()
+	if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil {
+		pingABStep.End(err)
+		t.Fatal(err)
+	}
+	pingABStep.End(nil)
+
+	bStInitial := env.Status(b)
+	bNodeKey := bStInitial.Self.PublicKey
+	cs := env.ControlServer()
+	bCtlNode := cs.Node(bNodeKey)
+	if bCtlNode == nil {
+		t.Fatalf("control server has no node for b's key %v", bNodeKey)
+	}
+	prevDisco := bCtlNode.DiscoKey
+	if prevDisco.IsZero() {
+		t.Fatalf("control server has no disco key for b before rotation")
+	}
+	t.Logf("[b] initial: nodekey=%s discokey=%s", bNodeKey.ShortString(), prevDisco.ShortString())
+
+	for _, p := range phases {
+		p.applyStep.Begin()
+		p.rotate()
+		p.applyStep.End(nil)
+		prevDisco = checkDiscoRotated(t, env, a, b, p.pingFrom, p.pingTo, bNodeKey, prevDisco, p.name,
+			p.verify, p.wait, p.ping)
+	}
+}
+
+// checkDiscoRotated verifies that after some action that should have rotated
+// b's disco key, control has learned the new key, b's node key is unchanged,
+// a's local view picks up the new disco key, and pingFrom can ping pingTo
+// (TSMP) within the budget. It returns b's new disco key and fatals on
+// failure.
+//
+// The TSMP ping budget is 15 seconds rather than the few hundred ms it
+// ought to take. See the top-level test docstring for a full breakdown:
+// it has to absorb LazyWG's trim+re-add for the rotate-a→b phase (~10s)
+// and wireguard-go's RekeyTimeout retry for the SIGKILL+restart phases
+// (~5s). Tighten this once both are addressed.
+func checkDiscoRotated(t *testing.T, env *vmtest.Env, a, b, pingFrom, pingTo *vmtest.Node, bNodeKey key.NodePublic, oldDisco key.DiscoPublic, label string, verifyStep, waitStep, pingStep *vmtest.Step) key.DiscoPublic {
+	t.Helper()
+	cs := env.ControlServer()
+
+	verifyStep.Begin()
+	bSt := env.Status(b)
+	if got := bSt.Self.PublicKey; got != bNodeKey {
+		err := fmt.Errorf("[%s] b's node key changed: %v -> %v", label, bNodeKey, got)
+		verifyStep.End(err)
+		t.Fatal(err)
+	}
+	var newDisco key.DiscoPublic
+	if err := tstest.WaitFor(15*time.Second, func() error {
+		n := cs.Node(bNodeKey)
+		if n == nil {
+			return fmt.Errorf("control server has no node for b")
+		}
+		if n.DiscoKey.IsZero() || n.DiscoKey == oldDisco {
+			return fmt.Errorf("control still has old disco key %v for b", n.DiscoKey)
+		}
+		newDisco = n.DiscoKey
+		return nil
+	}); err != nil {
+		verifyStep.End(err)
+		t.Fatalf("[%s] %v", label, err)
+	}
+	t.Logf("[b] after %s: nodekey=%s discokey=%s", label, bNodeKey.ShortString(), newDisco.ShortString())
+	verifyStep.End(nil)
+
+	waitStep.Begin()
+	if err := tstest.WaitFor(30*time.Second, func() error {
+		d, ok, err := env.PeerDiscoKey(a, bNodeKey)
+		if err != nil {
+			return err
+		}
+		if !ok {
+			return fmt.Errorf("a doesn't yet have b in its status")
+		}
+		if d != newDisco {
+			return fmt.Errorf("a still sees b's old disco %v, want %v", d.ShortString(), newDisco.ShortString())
+		}
+		return nil
+	}); err != nil {
+		waitStep.End(err)
+		env.DumpStatus(a)
+		t.Fatalf("[%s] %v", label, err)
+	}
+	waitStep.End(nil)
+
+	pingStep.Begin()
+	t0 := time.Now()
+	if err := env.Ping(pingFrom, pingTo, tailcfg.PingTSMP, 15*time.Second); err != nil {
+		pingStep.End(err)
+		env.DumpStatus(a)
+		env.DumpStatus(b)
+		t.Fatalf("[%s] %v", label, err)
+	}
+	t.Logf("[%s] ping %s -> %s succeeded in %v", label, pingFrom.Name(), pingTo.Name(), time.Since(t0).Round(100*time.Millisecond))
+	pingStep.End(nil)
+	return newDisco
+}
+
 // TestMullvadExitNode verifies that a Tailscale client whose netmap contains
 // a plain-WireGuard exit node (the way Mullvad exit nodes are wired up by
 // the control plane) can route internet traffic through it, with the source