From 6b729795c30f3f408b3caa983713eec1136d74ff Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Tue, 12 May 2026 20:39:20 +0000 Subject: [PATCH] derp/derpserver: use hashtriemap for peer lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the process-global Server.mu lookup in the packet send hot path with a global hashtriemap mirror of local clientSet entries. The authoritative clients map remains guarded by Server.mu; clientsAtomic is only a lock-free fast path for active local clients. Misses, stale inactive client sets, duplicate accounting, and mesh forwarding still fall back to lookupDestUncached. This avoids taking Server.mu for the common local active-client send path, at the cost of adding one global concurrent map that mirrors Server.clients for local peers. The benchmark uses four destination peers. The before run sets TS_DEBUG_DERP_DISABLE_PEER_HASHTRIE=true to force the old mutex lookup path; the after run uses the hashtrie fast path. goos: linux goarch: amd64 pkg: tailscale.com/derp/derpserver cpu: Intel(R) Xeon(R) 6975P-C │ before │ after │ │ sec/op │ sec/op vs base │ LookupDestHashTrie-16 176.050n ± 1% 1.904n ± 6% -98.92% (p=0.000 n=10) │ before │ after │ │ B/op │ B/op vs base │ LookupDestHashTrie-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal │ before │ after │ │ allocs/op │ allocs/op vs base │ LookupDestHashTrie-16 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal Updates #3560 (very indirectly, historically) Updates #19713 (as an alternative to that PR) Change-Id: Ifb72e5c9854ad00e938cd24c6ab9c27312f297e8 Signed-off-by: Brad Fitzpatrick --- cmd/derper/depaware.txt | 3 +- derp/derpserver/derpserver.go | 63 +++++++++--- derp/derpserver/derpserver_test.go | 155 +++++++++++++++++++++++++++++ flake.nix | 2 +- flakehashes.json | 4 +- go.mod | 1 + go.sum | 2 + shell.nix | 2 +- 8 files changed, 214 insertions(+), 18 deletions(-) diff --git a/cmd/derper/depaware.txt b/cmd/derper/depaware.txt index ec59c7264..c927335e4 100644 --- a/cmd/derper/depaware.txt +++ b/cmd/derper/depaware.txt @@ -20,6 +20,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa github.com/go-json-experiment/json/internal/jsonopts from github.com/go-json-experiment/json+ github.com/go-json-experiment/json/internal/jsonwire from github.com/go-json-experiment/json+ github.com/go-json-experiment/json/jsontext from github.com/go-json-experiment/json+ + 💣 github.com/go4org/hashtriemap from tailscale.com/derp/derpserver github.com/golang/groupcache/lru from tailscale.com/net/dnscache github.com/hdevalence/ed25519consensus from tailscale.com/tka L 💣 github.com/jsimonetti/rtnetlink from tailscale.com/net/netmon @@ -310,7 +311,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa hash from crypto+ hash/crc32 from compress/gzip+ hash/fnv from google.golang.org/protobuf/internal/detrand - hash/maphash from go4.org/mem + hash/maphash from go4.org/mem+ html from net/http/pprof+ html/template from tailscale.com/cmd/derper+ internal/abi from crypto/x509/internal/macos+ diff --git a/derp/derpserver/derpserver.go b/derp/derpserver/derpserver.go index 4e60fff67..e1b45ba3f 100644 --- a/derp/derpserver/derpserver.go +++ b/derp/derpserver/derpserver.go @@ -38,6 +38,7 @@ import ( "time" "github.com/axiomhq/hyperloglog" + "github.com/go4org/hashtriemap" "go4.org/mem" "golang.org/x/sync/errgroup" xrate "golang.org/x/time/rate" @@ -65,6 +66,8 @@ import ( // verbosely log whenever DERP drops a packet. var verboseDropKeys = map[key.NodePublic]bool{} +var debugDisablePeerHashTrie = envknob.RegisterBool("TS_DEBUG_DERP_DISABLE_PEER_HASHTRIE") + // IdealNodeContextKey is the context key used to pass the IdealNodeHeader value // from the HTTP handler to the DERP server's Accept method. var IdealNodeContextKey = ctxkey.New("ideal-node", "") @@ -206,6 +209,11 @@ type Server struct { // maps from netip.AddrPort to a client's public key keyOfAddr map[netip.AddrPort]key.NodePublic rateConfig RateConfig // per-client DERP frame rate limiting config + + // clientsAtomic mirrors clients for local active-client lookup without + // taking Server.mu. The authoritative clients map is still guarded by + // Server.mu; this mirror is only a fast path for handleFrameSendPacket. + clientsAtomic hashtriemap.HashTrieMap[key.NodePublic, *clientSet] } // clientSet represents 1 or more *sclients. @@ -777,6 +785,7 @@ func (s *Server) registerClient(c *sclient) { } cs.activeClient.Store(c) + s.clientsAtomic.Store(c.key, cs) if _, ok := s.clientsMesh[c.key]; !ok { s.clientsMesh[c.key] = nil // just for varz of total users in cluster @@ -832,6 +841,7 @@ func (s *Server) unregisterClient(c *sclient) { c.debugLogf("removed connection") set.activeClient.Store(nil) delete(s.clients, c.key) + s.clientsAtomic.CompareAndDelete(c.key, set) if v, ok := s.clientsMesh[c.key]; ok && v == nil { delete(s.clientsMesh, c.key) s.notePeerGoneFromRegionLocked(c.key) @@ -1260,6 +1270,45 @@ func (c *sclient) handleFrameForwardPacket(_ derp.FrameType, fl uint32) error { }) } +// lookupDest returns the local client, mesh forwarder, or duplicate-client +// count for dst. dstLen is only meaningful when the returned local client is +// nil; when a local client is returned, dstLen is just non-zero. +// +// It first tries clientsAtomic as a lock-free fast path for active local +// clients. Cache misses, inactive clientSets, duplicate-client accounting, and +// mesh forwarder lookups fall back to lookupDestUncached. +func (c *sclient) lookupDest(dst key.NodePublic) (_ *sclient, fwd PacketForwarder, dstLen int) { + if !debugDisablePeerHashTrie() { + if set, ok := c.s.clientsAtomic.Load(dst); ok { + if dst := set.activeClient.Load(); dst != nil { + return dst, nil, 1 + } + } + } + return c.lookupDestUncached(dst) +} + +// lookupDestUncached is the authoritative destination lookup. It takes +// Server.mu to read Server.clients and Server.clientsMesh. At most one local +// client and PacketForwarder can be non-nil: local clients win over mesh +// forwarding, and mesh forwarding is considered only when there is no local +// clientSet. +func (c *sclient) lookupDestUncached(dst key.NodePublic) (_ *sclient, fwd PacketForwarder, dstLen int) { + s := c.s + s.mu.Lock() + defer s.mu.Unlock() + if set, ok := s.clients[dst]; ok { + if dst := set.activeClient.Load(); dst != nil { + return dst, nil, 1 + } + dstLen = set.Len() + } + if dstLen < 1 { + fwd = s.clientsMesh[dst] + } + return nil, fwd, dstLen +} + // handleFrameSendPacket reads a "send packet" frame from the client. func (c *sclient) handleFrameSendPacket(_ derp.FrameType, fl uint32) error { s := c.s @@ -1269,19 +1318,7 @@ func (c *sclient) handleFrameSendPacket(_ derp.FrameType, fl uint32) error { return fmt.Errorf("client %v: recvPacket: %v", c.key, err) } - var fwd PacketForwarder - var dstLen int - var dst *sclient - - s.mu.Lock() - if set, ok := s.clients[dstKey]; ok { - dstLen = set.Len() - dst = set.activeClient.Load() - } - if dst == nil && dstLen < 1 { - fwd = s.clientsMesh[dstKey] - } - s.mu.Unlock() + dst, fwd, dstLen := c.lookupDest(dstKey) if dst == nil { if fwd != nil { diff --git a/derp/derpserver/derpserver_test.go b/derp/derpserver/derpserver_test.go index 7143a9b3d..9e9dc9802 100644 --- a/derp/derpserver/derpserver_test.go +++ b/derp/derpserver/derpserver_test.go @@ -29,6 +29,8 @@ import ( "golang.org/x/time/rate" "tailscale.com/derp" "tailscale.com/derp/derpconst" + "tailscale.com/envknob" + "tailscale.com/tstime" "tailscale.com/types/key" "tailscale.com/types/logger" "tailscale.com/util/set" @@ -1445,6 +1447,159 @@ func TestLoadAndApplyRateConfig(t *testing.T) { }) } +const peerHashTrieDisableEnv = "TS_DEBUG_DERP_DISABLE_PEER_HASHTRIE" + +func setPeerHashTrieDisabled(tb testing.TB, disabled bool) { + tb.Helper() + envknob.Setenv(peerHashTrieDisableEnv, fmt.Sprint(disabled)) + tb.Cleanup(func() { envknob.Setenv(peerHashTrieDisableEnv, "") }) +} + +func TestLookupDestHashTrieFastPath(t *testing.T) { + setPeerHashTrieDisabled(t, false) + + s := &Server{ + clients: map[key.NodePublic]*clientSet{}, + clientsMesh: map[key.NodePublic]PacketForwarder{}, + clock: tstime.StdClock{}, + } + src := pubAll(1) + dst := pubAll(2) + dstClient := &sclient{key: dst} + cs := &clientSet{} + cs.activeClient.Store(dstClient) + s.clients[dst] = cs + s.clientsAtomic.Store(dst, cs) + + c := &sclient{s: s, key: src} + got, fwd, dstLen := c.lookupDest(dst) + if got != dstClient || fwd != nil || dstLen != 1 { + t.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, 1)", got, fwd, dstLen, dstClient) + } + + // This must not deadlock while s.mu is held; the hashtrie fast path + // should not acquire Server.mu. + s.mu.Lock() + got, _, _ = c.lookupDest(dst) + s.mu.Unlock() + if got != dstClient { + t.Fatalf("lookupDest got %v, want %v", got, dstClient) + } +} + +func TestLookupDestHashTrieFallsBackForForwarder(t *testing.T) { + setPeerHashTrieDisabled(t, false) + + s := &Server{ + clients: map[key.NodePublic]*clientSet{}, + clientsMesh: map[key.NodePublic]PacketForwarder{}, + clock: tstime.StdClock{}, + } + src := pubAll(1) + dst := pubAll(2) + c := &sclient{s: s, key: src} + + s.clientsMesh[dst] = testFwd(1) + got, fwd, dstLen := c.lookupDest(dst) + if got != nil || fwd != testFwd(1) || dstLen != 0 { + t.Fatalf("lookupDest = (%v, %v, %d), want (nil, testFwd(1), 0)", got, fwd, dstLen) + } +} + +func TestLookupDestHashTrieIgnoresInactiveStaleSet(t *testing.T) { + setPeerHashTrieDisabled(t, false) + + s := &Server{ + clients: map[key.NodePublic]*clientSet{}, + clientsMesh: map[key.NodePublic]PacketForwarder{}, + clock: tstime.StdClock{}, + } + src := pubAll(1) + dst := pubAll(2) + c := &sclient{s: s, key: src} + + s.clientsAtomic.Store(dst, &clientSet{}) + + newClient := &sclient{key: dst} + newSet := &clientSet{} + newSet.activeClient.Store(newClient) + s.clients[dst] = newSet + + got, fwd, dstLen := c.lookupDest(dst) + if got != newClient || fwd != nil || dstLen != 1 { + t.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, 1)", got, fwd, dstLen, newClient) + } +} + +func TestLookupDestHashTrieNoAlloc(t *testing.T) { + setPeerHashTrieDisabled(t, false) + + s := &Server{ + clients: map[key.NodePublic]*clientSet{}, + clientsMesh: map[key.NodePublic]PacketForwarder{}, + clock: tstime.StdClock{}, + } + var dstKeys [4]key.NodePublic + var dstClients [4]*sclient + for i := range dstKeys { + dstKeys[i] = pubAll(byte(i + 2)) + dstClients[i] = &sclient{key: dstKeys[i]} + cs := &clientSet{} + cs.activeClient.Store(dstClients[i]) + s.clients[dstKeys[i]] = cs + s.clientsAtomic.Store(dstKeys[i], cs) + } + c := &sclient{s: s, key: pubAll(1)} + + var i int + var got *sclient + allocs := testing.AllocsPerRun(1000, func() { + idx := i & (len(dstKeys) - 1) + got, _, _ = c.lookupDest(dstKeys[idx]) + i++ + }) + if got == nil { + t.Fatal("lookupDest returned nil") + } + if allocs != 0 { + t.Fatalf("lookupDest allocated %v times per run, want 0", allocs) + } +} + +func BenchmarkLookupDestHashTrie(b *testing.B) { + s := &Server{ + clients: map[key.NodePublic]*clientSet{}, + clientsMesh: map[key.NodePublic]PacketForwarder{}, + clock: tstime.StdClock{}, + } + var dstKeys [4]key.NodePublic + var dstClients [4]*sclient + for i := range dstKeys { + dstKeys[i] = pubAll(byte(i + 2)) + dstClients[i] = &sclient{key: dstKeys[i]} + cs := &clientSet{} + cs.activeClient.Store(dstClients[i]) + s.clients[dstKeys[i]] = cs + s.clientsAtomic.Store(dstKeys[i], cs) + } + + b.ReportAllocs() + b.SetParallelism(32) + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + c := &sclient{s: s, key: pubAll(1)} + var i int + for pb.Next() { + idx := i & (len(dstKeys) - 1) + got, fwd, dstLen := c.lookupDest(dstKeys[idx]) + if got != dstClients[idx] || fwd != nil { + b.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, _)", got, fwd, dstLen, dstClients[idx]) + } + i++ + } + }) +} + func BenchmarkSenderCardinalityOverhead(b *testing.B) { hll := hyperloglog.New() sender := key.NewNode().Public() diff --git a/flake.nix b/flake.nix index 9dbb5abbe..e2c237f42 100644 --- a/flake.nix +++ b/flake.nix @@ -164,4 +164,4 @@ }); }; } -# nix-direnv cache busting line: sha256-mbxLXR2TBgiwyVGfLmMR5xWk+0f66mPDas95Wla70Lk= +# nix-direnv cache busting line: sha256-Xwm+ZLNqd2k7c2GFQJ2Pf/xuFLMcXhYl5I/YVgS9V4U= diff --git a/flakehashes.json b/flakehashes.json index 9ee6ccb99..b5f6234e2 100644 --- a/flakehashes.json +++ b/flakehashes.json @@ -4,7 +4,7 @@ "sri": "sha256-HeD70CytKL0Ks/VDqMU73bN8fxpWkNc6mNgNr9PEO7k=" }, "vendor": { - "goModSum": "sha256-IbxUmMBapp3G2WIK+gqfmQd1tLCVoHMYBHLPZ5ZjDIU=", - "sri": "sha256-mbxLXR2TBgiwyVGfLmMR5xWk+0f66mPDas95Wla70Lk=" + "goModSum": "sha256-qAO4LAc1PwV43rr/kDsfYwkxeXAelP5DoNSZiCkwcpU=", + "sri": "sha256-Xwm+ZLNqd2k7c2GFQJ2Pf/xuFLMcXhYl5I/YVgS9V4U=" } } diff --git a/go.mod b/go.mod index 11b6605bf..89038db40 100644 --- a/go.mod +++ b/go.mod @@ -41,6 +41,7 @@ require ( github.com/go-json-experiment/json v0.0.0-20250813024750-ebf49471dced github.com/go-logr/zapr v1.3.0 github.com/go-ole/go-ole v1.3.0 + github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689 github.com/go4org/plan9netshell v0.0.0-20250324183649-788daa080737 github.com/godbus/dbus/v5 v5.1.1-0.20230522191255-76236955d466 github.com/gokrazy/breakglass v0.0.0-20251229072214-9dbc0478d486 diff --git a/go.sum b/go.sum index 295ad3aed..b5e8950c3 100644 --- a/go.sum +++ b/go.sum @@ -463,6 +463,8 @@ github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9L github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/go-xmlfmt/xmlfmt v1.1.2 h1:Nea7b4icn8s57fTx1M5AI4qQT5HEM3rVUO8MuE6g80U= github.com/go-xmlfmt/xmlfmt v1.1.2/go.mod h1:aUCEOzzezBEjDBbFBoSiya/gduyIiWYRP6CnSFIV8AM= +github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689 h1:0psnKZ+N2IP43/SZC8SKx6OpFJwLmQb9m9QyV9BC2f8= +github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689/go.mod h1:OGmRfY/9QEK2P5zCRtmqfbCF283xPkU2dvVA4MvbvpI= github.com/go4org/plan9netshell v0.0.0-20250324183649-788daa080737 h1:cf60tHxREO3g1nroKr2osU3JWZsJzkfi7rEg+oAB0Lo= github.com/go4org/plan9netshell v0.0.0-20250324183649-788daa080737/go.mod h1:MIS0jDzbU/vuM9MC4YnBITCv+RYuTRq8dJzmCrFsK9g= github.com/gobuffalo/flect v1.0.3 h1:xeWBM2nui+qnVvNM4S3foBhCAL2XgPU+a7FdpelbTq4= diff --git a/shell.nix b/shell.nix index 648f101a8..bd81d79f0 100644 --- a/shell.nix +++ b/shell.nix @@ -16,4 +16,4 @@ ) { src = ./.; }).shellNix -# nix-direnv cache busting line: sha256-mbxLXR2TBgiwyVGfLmMR5xWk+0f66mPDas95Wla70Lk= +# nix-direnv cache busting line: sha256-Xwm+ZLNqd2k7c2GFQJ2Pf/xuFLMcXhYl5I/YVgS9V4U=