From 51f7c84f2e93bd2a00bc14870adfa16929abe2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claus=20Lensb=C3=B8l?= Date: Wed, 4 Feb 2026 13:36:31 -0500 Subject: [PATCH] wgengine/magicsock: do not overwrite discokey when key has been used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a client starts up without being able to connect to control, it sends its discoKey to other nodes it wants to communicate with over TSMP. This disco key will be a newer key than the one control knows about. If the client that can connect to control gets a full netmap, ensure that the disco key for the node not connected to control is not overwritten with the stale key control knows about. Updates #12639 Signed-off-by: Claus Lensbøl --- wgengine/magicsock/endpoint.go | 40 ++++++++++++++++++++++++++--- wgengine/magicsock/endpoint_test.go | 31 ++++++++++++++++++++++ wgengine/magicsock/magicsock.go | 15 +++++------ 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index 1f99f57ec..c8359ef5f 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -17,6 +17,7 @@ import ( "reflect" "runtime" "slices" + "sync" "sync/atomic" "time" @@ -90,6 +91,13 @@ type endpoint struct { endpointState map[netip.AddrPort]*endpointState // netip.AddrPort type for key (instead of [epAddr]) as [endpointState] is irrelevant for Geneve-encapsulated paths isCallMeMaybeEP map[netip.AddrPort]bool + // We save the previous discoKeys to ensure that control is not overwriting a + // newer received via TSMP. We need to store multiple previous keys as we + // could have the endpoint restart multiple times while not connected to control. + previousDiscoKeys map[string]bool + + initializeEndpoint sync.Once + // The following fields are related to the new "silent disco" // implementation that's a WIP as of 2022-10-20. // See #540 for background. @@ -1465,6 +1473,32 @@ func (de *endpoint) setLastPing(ipp netip.AddrPort, now mono.Time) { state.lastPing = now } +// updateDiscoKeyLocked takes in a new discoKey for the endpoint to be updated. +// It ensures that the new key is not a previously held key. This prevents +// control from overwriting a key set by TSMP in a case where the endpoint +// represented by de is unable to contact control and has shared its disco key +// via TSMP. If key is a previously held key, this method is a noop. de.mu must +// be held while calling. +func (de *endpoint) updateDiscoKeyLocked(key *key.DiscoPublic) { + de.initializeEndpoint.Do(func() { + de.previousDiscoKeys = make(map[string]bool) + }) + var epDisco *endpointDisco + if key != nil { + if _, ok := de.previousDiscoKeys[key.String()]; ok { + return + } + epDisco = &endpointDisco{ + key: *key, + short: key.ShortString(), + } + if de.disco.Load() != nil { + de.previousDiscoKeys[de.disco.Load().key.String()] = true + } + } + de.disco.Store(epDisco) +} + // updateFromNode updates the endpoint based on a tailcfg.Node from a NetMap // update. func (de *endpoint) updateFromNode(n tailcfg.NodeView, heartbeatDisabled bool, probeUDPLifetimeEnabled bool) { @@ -1490,10 +1524,8 @@ func (de *endpoint) updateFromNode(n tailcfg.NodeView, heartbeatDisabled bool, p if discoKey != n.DiscoKey() { de.c.logf("[v1] magicsock: disco: node %s changed from %s to %s", de.publicKey.ShortString(), discoKey, n.DiscoKey()) - de.disco.Store(&endpointDisco{ - key: n.DiscoKey(), - short: n.DiscoKey().ShortString(), - }) + key := n.DiscoKey() + de.updateDiscoKeyLocked(&key) de.debugUpdates.Add(EndpointChange{ When: time.Now(), What: "updateFromNode-resetLocked", diff --git a/wgengine/magicsock/endpoint_test.go b/wgengine/magicsock/endpoint_test.go index 43ff012c7..c77043114 100644 --- a/wgengine/magicsock/endpoint_test.go +++ b/wgengine/magicsock/endpoint_test.go @@ -453,3 +453,34 @@ func Test_endpoint_udpRelayEndpointReady(t *testing.T) { }) } } + +func TestUpdateDiscoKey(t *testing.T) { + t.Run("SetKey", func(t *testing.T) { + de := &endpoint{} + newKey := key.NewDisco().Public() + de.updateDiscoKeyLocked(&newKey) + if newKey.Compare(de.disco.Load().key) != 0 { + t.Errorf("disco keys not equal, expected %v, got %v", newKey, de.disco.Load().key) + } + }) + + t.Run("SetNilKey", func(t *testing.T) { + de := &endpoint{} + de.updateDiscoKeyLocked(nil) + if de.disco.Load() != nil { + t.Errorf("disco keys not equal, expected %v, got %v", nil, de.disco.Load().key) + } + }) + + t.Run("SetPreviouslySetKey", func(t *testing.T) { + de := &endpoint{} + oldKey := key.NewDisco().Public() + newKey := key.NewDisco().Public() + de.updateDiscoKeyLocked(&oldKey) + de.updateDiscoKeyLocked(&newKey) + de.updateDiscoKeyLocked(&oldKey) // <- Should not change the key + if newKey.Compare(de.disco.Load().key) != 0 { + t.Errorf("disco keys not equal, expected %v, got %v", newKey, de.disco.Load().key) + } + }) +} diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index d6f411f4a..563a484d2 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -3180,14 +3180,14 @@ func (c *Conn) updateNodes(update NodeViewsUpdate) (peersChanged bool) { ep.nodeAddr = n.Addresses().At(0).Addr() } ep.initFakeUDPAddr() + ep.mu.Lock() if n.DiscoKey().IsZero() { - ep.disco.Store(nil) + ep.updateDiscoKeyLocked(nil) } else { - ep.disco.Store(&endpointDisco{ - key: n.DiscoKey(), - short: n.DiscoKey().ShortString(), - }) + key := n.DiscoKey() + ep.updateDiscoKeyLocked(&key) } + ep.mu.Unlock() if debugPeerMap() { c.logEndpointCreated(n) @@ -4321,10 +4321,7 @@ func (c *Conn) HandleDiscoKeyAdvertisement(node tailcfg.NodeView, update packet. return } c.discoInfoForKnownPeerLocked(discoKey) - ep.disco.Store(&endpointDisco{ - key: discoKey, - short: discoKey.ShortString(), - }) + ep.updateDiscoKeyLocked(&discoKey) c.peerMap.upsertEndpoint(ep, oldDiscoKey) c.logf("magicsock: updated disco key for peer %v to %v", nodeKey.ShortString(), discoKey.ShortString()) metricTSMPDiscoKeyAdvertisementApplied.Add(1)