derp/derpserver: use hashtriemap for peer lookup

Replace the process-global Server.mu lookup in the packet send hot path
with a global hashtriemap mirror of local clientSet entries. The
authoritative clients map remains guarded by Server.mu; clientsAtomic is
only a lock-free fast path for active local clients.

Misses, stale inactive client sets, duplicate accounting, and mesh
forwarding still fall back to lookupDestUncached. This avoids taking
Server.mu for the common local active-client send path, at the cost of
adding one global concurrent map that mirrors Server.clients for local
peers.

The benchmark uses four destination peers. The before run sets
TS_DEBUG_DERP_DISABLE_PEER_HASHTRIE=true to force the old mutex lookup
path; the after run uses the hashtrie fast path.

    goos: linux
    goarch: amd64
    pkg: tailscale.com/derp/derpserver
    cpu: Intel(R) Xeon(R) 6975P-C
                          │    before     │                after                │
                          │    sec/op     │   sec/op     vs base                │
    LookupDestHashTrie-16   176.050n ± 1%   1.904n ± 6%  -98.92% (p=0.000 n=10)

                          │   before   │             after              │
                          │    B/op    │    B/op     vs base            │
    LookupDestHashTrie-16   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=10) ¹
    ¹ all samples are equal

                          │   before   │             after              │
                          │ allocs/op  │ allocs/op   vs base            │
    LookupDestHashTrie-16   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=10) ¹
    ¹ all samples are equal

Updates #3560 (very indirectly, historically)
Updates #19713 (as an alternative to that PR)

Change-Id: Ifb72e5c9854ad00e938cd24c6ab9c27312f297e8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2026-05-12 20:39:20 +00:00 committed by Brad Fitzpatrick
parent 72578de033
commit 6b729795c3
8 changed files with 214 additions and 18 deletions

View File

@ -20,6 +20,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
github.com/go-json-experiment/json/internal/jsonopts from github.com/go-json-experiment/json+
github.com/go-json-experiment/json/internal/jsonwire from github.com/go-json-experiment/json+
github.com/go-json-experiment/json/jsontext from github.com/go-json-experiment/json+
💣 github.com/go4org/hashtriemap from tailscale.com/derp/derpserver
github.com/golang/groupcache/lru from tailscale.com/net/dnscache
github.com/hdevalence/ed25519consensus from tailscale.com/tka
L 💣 github.com/jsimonetti/rtnetlink from tailscale.com/net/netmon
@ -310,7 +311,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
hash from crypto+
hash/crc32 from compress/gzip+
hash/fnv from google.golang.org/protobuf/internal/detrand
hash/maphash from go4.org/mem
hash/maphash from go4.org/mem+
html from net/http/pprof+
html/template from tailscale.com/cmd/derper+
internal/abi from crypto/x509/internal/macos+

View File

@ -38,6 +38,7 @@ import (
"time"
"github.com/axiomhq/hyperloglog"
"github.com/go4org/hashtriemap"
"go4.org/mem"
"golang.org/x/sync/errgroup"
xrate "golang.org/x/time/rate"
@ -65,6 +66,8 @@ import (
// verbosely log whenever DERP drops a packet.
var verboseDropKeys = map[key.NodePublic]bool{}
var debugDisablePeerHashTrie = envknob.RegisterBool("TS_DEBUG_DERP_DISABLE_PEER_HASHTRIE")
// IdealNodeContextKey is the context key used to pass the IdealNodeHeader value
// from the HTTP handler to the DERP server's Accept method.
var IdealNodeContextKey = ctxkey.New("ideal-node", "")
@ -206,6 +209,11 @@ type Server struct {
// maps from netip.AddrPort to a client's public key
keyOfAddr map[netip.AddrPort]key.NodePublic
rateConfig RateConfig // per-client DERP frame rate limiting config
// clientsAtomic mirrors clients for local active-client lookup without
// taking Server.mu. The authoritative clients map is still guarded by
// Server.mu; this mirror is only a fast path for handleFrameSendPacket.
clientsAtomic hashtriemap.HashTrieMap[key.NodePublic, *clientSet]
}
// clientSet represents 1 or more *sclients.
@ -777,6 +785,7 @@ func (s *Server) registerClient(c *sclient) {
}
cs.activeClient.Store(c)
s.clientsAtomic.Store(c.key, cs)
if _, ok := s.clientsMesh[c.key]; !ok {
s.clientsMesh[c.key] = nil // just for varz of total users in cluster
@ -832,6 +841,7 @@ func (s *Server) unregisterClient(c *sclient) {
c.debugLogf("removed connection")
set.activeClient.Store(nil)
delete(s.clients, c.key)
s.clientsAtomic.CompareAndDelete(c.key, set)
if v, ok := s.clientsMesh[c.key]; ok && v == nil {
delete(s.clientsMesh, c.key)
s.notePeerGoneFromRegionLocked(c.key)
@ -1260,6 +1270,45 @@ func (c *sclient) handleFrameForwardPacket(_ derp.FrameType, fl uint32) error {
})
}
// lookupDest returns the local client, mesh forwarder, or duplicate-client
// count for dst. dstLen is only meaningful when the returned local client is
// nil; when a local client is returned, dstLen is just non-zero.
//
// It first tries clientsAtomic as a lock-free fast path for active local
// clients. Cache misses, inactive clientSets, duplicate-client accounting, and
// mesh forwarder lookups fall back to lookupDestUncached.
func (c *sclient) lookupDest(dst key.NodePublic) (_ *sclient, fwd PacketForwarder, dstLen int) {
if !debugDisablePeerHashTrie() {
if set, ok := c.s.clientsAtomic.Load(dst); ok {
if dst := set.activeClient.Load(); dst != nil {
return dst, nil, 1
}
}
}
return c.lookupDestUncached(dst)
}
// lookupDestUncached is the authoritative destination lookup. It takes
// Server.mu to read Server.clients and Server.clientsMesh. At most one local
// client and PacketForwarder can be non-nil: local clients win over mesh
// forwarding, and mesh forwarding is considered only when there is no local
// clientSet.
func (c *sclient) lookupDestUncached(dst key.NodePublic) (_ *sclient, fwd PacketForwarder, dstLen int) {
s := c.s
s.mu.Lock()
defer s.mu.Unlock()
if set, ok := s.clients[dst]; ok {
if dst := set.activeClient.Load(); dst != nil {
return dst, nil, 1
}
dstLen = set.Len()
}
if dstLen < 1 {
fwd = s.clientsMesh[dst]
}
return nil, fwd, dstLen
}
// handleFrameSendPacket reads a "send packet" frame from the client.
func (c *sclient) handleFrameSendPacket(_ derp.FrameType, fl uint32) error {
s := c.s
@ -1269,19 +1318,7 @@ func (c *sclient) handleFrameSendPacket(_ derp.FrameType, fl uint32) error {
return fmt.Errorf("client %v: recvPacket: %v", c.key, err)
}
var fwd PacketForwarder
var dstLen int
var dst *sclient
s.mu.Lock()
if set, ok := s.clients[dstKey]; ok {
dstLen = set.Len()
dst = set.activeClient.Load()
}
if dst == nil && dstLen < 1 {
fwd = s.clientsMesh[dstKey]
}
s.mu.Unlock()
dst, fwd, dstLen := c.lookupDest(dstKey)
if dst == nil {
if fwd != nil {

View File

@ -29,6 +29,8 @@ import (
"golang.org/x/time/rate"
"tailscale.com/derp"
"tailscale.com/derp/derpconst"
"tailscale.com/envknob"
"tailscale.com/tstime"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/util/set"
@ -1445,6 +1447,159 @@ func TestLoadAndApplyRateConfig(t *testing.T) {
})
}
const peerHashTrieDisableEnv = "TS_DEBUG_DERP_DISABLE_PEER_HASHTRIE"
func setPeerHashTrieDisabled(tb testing.TB, disabled bool) {
tb.Helper()
envknob.Setenv(peerHashTrieDisableEnv, fmt.Sprint(disabled))
tb.Cleanup(func() { envknob.Setenv(peerHashTrieDisableEnv, "") })
}
func TestLookupDestHashTrieFastPath(t *testing.T) {
setPeerHashTrieDisabled(t, false)
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
src := pubAll(1)
dst := pubAll(2)
dstClient := &sclient{key: dst}
cs := &clientSet{}
cs.activeClient.Store(dstClient)
s.clients[dst] = cs
s.clientsAtomic.Store(dst, cs)
c := &sclient{s: s, key: src}
got, fwd, dstLen := c.lookupDest(dst)
if got != dstClient || fwd != nil || dstLen != 1 {
t.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, 1)", got, fwd, dstLen, dstClient)
}
// This must not deadlock while s.mu is held; the hashtrie fast path
// should not acquire Server.mu.
s.mu.Lock()
got, _, _ = c.lookupDest(dst)
s.mu.Unlock()
if got != dstClient {
t.Fatalf("lookupDest got %v, want %v", got, dstClient)
}
}
func TestLookupDestHashTrieFallsBackForForwarder(t *testing.T) {
setPeerHashTrieDisabled(t, false)
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
src := pubAll(1)
dst := pubAll(2)
c := &sclient{s: s, key: src}
s.clientsMesh[dst] = testFwd(1)
got, fwd, dstLen := c.lookupDest(dst)
if got != nil || fwd != testFwd(1) || dstLen != 0 {
t.Fatalf("lookupDest = (%v, %v, %d), want (nil, testFwd(1), 0)", got, fwd, dstLen)
}
}
func TestLookupDestHashTrieIgnoresInactiveStaleSet(t *testing.T) {
setPeerHashTrieDisabled(t, false)
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
src := pubAll(1)
dst := pubAll(2)
c := &sclient{s: s, key: src}
s.clientsAtomic.Store(dst, &clientSet{})
newClient := &sclient{key: dst}
newSet := &clientSet{}
newSet.activeClient.Store(newClient)
s.clients[dst] = newSet
got, fwd, dstLen := c.lookupDest(dst)
if got != newClient || fwd != nil || dstLen != 1 {
t.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, 1)", got, fwd, dstLen, newClient)
}
}
func TestLookupDestHashTrieNoAlloc(t *testing.T) {
setPeerHashTrieDisabled(t, false)
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
var dstKeys [4]key.NodePublic
var dstClients [4]*sclient
for i := range dstKeys {
dstKeys[i] = pubAll(byte(i + 2))
dstClients[i] = &sclient{key: dstKeys[i]}
cs := &clientSet{}
cs.activeClient.Store(dstClients[i])
s.clients[dstKeys[i]] = cs
s.clientsAtomic.Store(dstKeys[i], cs)
}
c := &sclient{s: s, key: pubAll(1)}
var i int
var got *sclient
allocs := testing.AllocsPerRun(1000, func() {
idx := i & (len(dstKeys) - 1)
got, _, _ = c.lookupDest(dstKeys[idx])
i++
})
if got == nil {
t.Fatal("lookupDest returned nil")
}
if allocs != 0 {
t.Fatalf("lookupDest allocated %v times per run, want 0", allocs)
}
}
func BenchmarkLookupDestHashTrie(b *testing.B) {
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
var dstKeys [4]key.NodePublic
var dstClients [4]*sclient
for i := range dstKeys {
dstKeys[i] = pubAll(byte(i + 2))
dstClients[i] = &sclient{key: dstKeys[i]}
cs := &clientSet{}
cs.activeClient.Store(dstClients[i])
s.clients[dstKeys[i]] = cs
s.clientsAtomic.Store(dstKeys[i], cs)
}
b.ReportAllocs()
b.SetParallelism(32)
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
c := &sclient{s: s, key: pubAll(1)}
var i int
for pb.Next() {
idx := i & (len(dstKeys) - 1)
got, fwd, dstLen := c.lookupDest(dstKeys[idx])
if got != dstClients[idx] || fwd != nil {
b.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, _)", got, fwd, dstLen, dstClients[idx])
}
i++
}
})
}
func BenchmarkSenderCardinalityOverhead(b *testing.B) {
hll := hyperloglog.New()
sender := key.NewNode().Public()

View File

@ -164,4 +164,4 @@
});
};
}
# nix-direnv cache busting line: sha256-mbxLXR2TBgiwyVGfLmMR5xWk+0f66mPDas95Wla70Lk=
# nix-direnv cache busting line: sha256-Xwm+ZLNqd2k7c2GFQJ2Pf/xuFLMcXhYl5I/YVgS9V4U=

View File

@ -4,7 +4,7 @@
"sri": "sha256-HeD70CytKL0Ks/VDqMU73bN8fxpWkNc6mNgNr9PEO7k="
},
"vendor": {
"goModSum": "sha256-IbxUmMBapp3G2WIK+gqfmQd1tLCVoHMYBHLPZ5ZjDIU=",
"sri": "sha256-mbxLXR2TBgiwyVGfLmMR5xWk+0f66mPDas95Wla70Lk="
"goModSum": "sha256-qAO4LAc1PwV43rr/kDsfYwkxeXAelP5DoNSZiCkwcpU=",
"sri": "sha256-Xwm+ZLNqd2k7c2GFQJ2Pf/xuFLMcXhYl5I/YVgS9V4U="
}
}

1
go.mod
View File

@ -41,6 +41,7 @@ require (
github.com/go-json-experiment/json v0.0.0-20250813024750-ebf49471dced
github.com/go-logr/zapr v1.3.0
github.com/go-ole/go-ole v1.3.0
github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689
github.com/go4org/plan9netshell v0.0.0-20250324183649-788daa080737
github.com/godbus/dbus/v5 v5.1.1-0.20230522191255-76236955d466
github.com/gokrazy/breakglass v0.0.0-20251229072214-9dbc0478d486

2
go.sum
View File

@ -463,6 +463,8 @@ github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9L
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/go-xmlfmt/xmlfmt v1.1.2 h1:Nea7b4icn8s57fTx1M5AI4qQT5HEM3rVUO8MuE6g80U=
github.com/go-xmlfmt/xmlfmt v1.1.2/go.mod h1:aUCEOzzezBEjDBbFBoSiya/gduyIiWYRP6CnSFIV8AM=
github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689 h1:0psnKZ+N2IP43/SZC8SKx6OpFJwLmQb9m9QyV9BC2f8=
github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689/go.mod h1:OGmRfY/9QEK2P5zCRtmqfbCF283xPkU2dvVA4MvbvpI=
github.com/go4org/plan9netshell v0.0.0-20250324183649-788daa080737 h1:cf60tHxREO3g1nroKr2osU3JWZsJzkfi7rEg+oAB0Lo=
github.com/go4org/plan9netshell v0.0.0-20250324183649-788daa080737/go.mod h1:MIS0jDzbU/vuM9MC4YnBITCv+RYuTRq8dJzmCrFsK9g=
github.com/gobuffalo/flect v1.0.3 h1:xeWBM2nui+qnVvNM4S3foBhCAL2XgPU+a7FdpelbTq4=

View File

@ -16,4 +16,4 @@
) {
src = ./.;
}).shellNix
# nix-direnv cache busting line: sha256-mbxLXR2TBgiwyVGfLmMR5xWk+0f66mPDas95Wla70Lk=
# nix-direnv cache busting line: sha256-Xwm+ZLNqd2k7c2GFQJ2Pf/xuFLMcXhYl5I/YVgS9V4U=