control/controlclient: cache sortedPeers result across calls

sortedPeers() copies the entire peers map into a slice and sorts it on
every call. With large peer sets this is expensive — at 50k peers,
each call costs ~9ms and 402KB for the MapValues copy and sort.

Cache the sorted result in sortedPeersCache, protected by peersMu.
The cache is invalidated (set to nil) at the start of
updatePeersStateFromResponse, which is the only writer of the peers
map. sortedPeers uses a read-lock fast path with double-checked
locking on upgrade to write lock.

At 50,000 peers:
- Cache hit (repeated calls): 17.7ns, 0 allocs (was 9.2ms, 402KB)
- After peer change: 9.2ms (unchanged — must re-sort)

Fixes tailscale/corp#40144

Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
This commit is contained in:
Mike O'Driscoll 2026-04-13 19:01:42 +00:00
parent 674f866ecc
commit dfdf61c383
2 changed files with 56 additions and 4 deletions

View File

@ -107,9 +107,10 @@ type mapSession struct {
changeQueueClosed bool
processQueue sync.WaitGroup
// mu protects the peers map.
peersMu sync.RWMutex
peers map[tailcfg.NodeID]tailcfg.NodeView
// peersMu protects the peers map and sortedPeersCache.
peersMu sync.RWMutex
peers map[tailcfg.NodeID]tailcfg.NodeView
sortedPeersCache []tailcfg.NodeView // lazily computed sorted view of peers; nil means dirty
}
// newMapSession returns a mostly unconfigured new mapSession.
@ -678,6 +679,8 @@ func (ms *mapSession) updatePeersStateFromResponse(resp *tailcfg.MapResponse) (s
ms.peersMu.Lock()
defer ms.peersMu.Unlock()
ms.sortedPeersCache = nil // invalidate; peers are about to change
if ms.peers == nil {
ms.peers = make(map[tailcfg.NodeID]tailcfg.NodeView)
}
@ -1082,12 +1085,24 @@ func (ms *mapSession) PeerIDAndKeyByTailscaleIP(ip netip.Addr) (tailcfg.NodeID,
func (ms *mapSession) sortedPeers() []tailcfg.NodeView {
ms.peersMu.RLock()
defer ms.peersMu.RUnlock()
if c := ms.sortedPeersCache; c != nil {
ms.peersMu.RUnlock()
return c
}
ms.peersMu.RUnlock()
// Cache miss; upgrade to write lock and rebuild.
ms.peersMu.Lock()
defer ms.peersMu.Unlock()
// Double-check after acquiring write lock.
if ms.sortedPeersCache != nil {
return ms.sortedPeersCache
}
ret := slicesx.MapValues(ms.peers)
slices.SortFunc(ret, func(a, b tailcfg.NodeView) int {
return cmp.Compare(a.ID(), b.ID())
})
ms.sortedPeersCache = ret
return ret
}

View File

@ -1838,3 +1838,40 @@ func TestPeerIDAndKeyByTailscaleIP(t *testing.T) {
}
})
}
func BenchmarkSortedPeers(b *testing.B) {
const numPeers = 50_000
ctx := context.Background()
nu := &countingNetmapUpdater{}
ms := newTestMapSession(b, nu)
ms.logf = func(string, ...any) {}
res := &tailcfg.MapResponse{
Node: &tailcfg.Node{ID: 1, Name: "foo.bar.ts.net."},
}
for i := range numPeers {
res.Peers = append(res.Peers, &tailcfg.Node{
ID: tailcfg.NodeID(i + 2),
Name: fmt.Sprintf("peer%d.bar.ts.net.", i),
Addresses: []netip.Prefix{netip.MustParsePrefix(fmt.Sprintf("100.64.%d.%d/32", i/256, i%256))},
AllowedIPs: []netip.Prefix{netip.MustParsePrefix(fmt.Sprintf("100.64.%d.%d/32", i/256, i%256))},
Hostinfo: (&tailcfg.Hostinfo{OS: "linux"}).View(),
})
}
ms.HandleNonKeepAliveMapResponse(ctx, res)
b.Run("repeated", func(b *testing.B) {
b.ReportAllocs()
for range b.N {
ms.sortedPeers()
}
})
b.Run("after_change", func(b *testing.B) {
b.ReportAllocs()
for range b.N {
ms.updatePeersStateFromResponse(&tailcfg.MapResponse{
OnlineChange: map[tailcfg.NodeID]bool{2: true},
})
ms.sortedPeers()
}
})
}