From 75819aeed07dfbae2d64ab0ef66a6e49aed026ef Mon Sep 17 00:00:00 2001
From: Jordan Whited <jordan@tailscale.com>
Date: Mon, 13 Apr 2026 16:31:41 -0700
Subject: [PATCH] derp/derpserver: increase minimum token bucket size

And cap WaitN calls to prevent token bucket errors. Frame length is
inclusive of DERP key for FrameSendPacket frames.

Updates tailscale/corp#40171

Signed-off-by: Jordan Whited <jordan@tailscale.com>
---
 derp/derpserver/derpserver.go      | 38 +++++++++++++++++++++---------
 derp/derpserver/derpserver_test.go | 22 +++++++----------
 2 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/derp/derpserver/derpserver.go b/derp/derpserver/derpserver.go
index 8a393d011..f05f94f09 100644
--- a/derp/derpserver/derpserver.go
+++ b/derp/derpserver/derpserver.go
@@ -514,6 +514,15 @@ func (s *Server) SetTCPWriteTimeout(d time.Duration) {
 	s.tcpWriteTimeout = d
 }
 
+// minRateLimitTokenBucketSize represents the minimum size of a token bucket
+// applied for the purposes of rate limiting a DERP connection per received DERP
+// frame.
+//
+// Note: The DERP protocol supports frames larger than this ([math.MaxUint32] length),
+// but a [derp.FrameSendPacket] cannot exceed this value, which is what we optimize
+// our token bucket calls for.
+const minRateLimitTokenBucketSize = derp.MaxPacketSize + derp.KeyLen
+
 // RateConfig is a JSON-serializable configuration for per-client rate limits.
 // Values are in bytes.
 type RateConfig struct {
@@ -521,8 +530,8 @@ type RateConfig struct {
 	// rate limit in bytes per second. A zero value disables rate-limiting.
 	PerClientRateLimitBytesPerSec uint64 `json:",omitzero"`
 	// PerClientRateBurstBytes represents the per-client token bucket depth,
-	// or burst, in bytes. Any value lower than [derp.MaxPacketSize]
-	// will be increased to [derp.MaxPacketSize] before application.
+	// or burst, in bytes. Any value lower than [minRateLimitTokenBucketSize]
+	// will be increased to [minRateLimitTokenBucketSize] before application.
 	PerClientRateBurstBytes uint64 `json:",omitzero"`
 }
 
@@ -556,15 +565,13 @@ func (s *Server) LoadAndApplyRateConfig(path string) error {
 
 // UpdatePerClientRateLimit sets the per-client receive rate limit in bytes per
 // second and the burst size in bytes, updating all existing client connections.
-// The burst is at least [derp.MaxPacketSize], ensuring at least a full packet
-// can be received in a burst even if the rate limit is low. If bytesPerSec is
-// 0, rate limiting is set to infinity. Mesh peers are always exempt from rate
-// limiting.
+// The applied burst will be at least [minRateLimitTokenBucketSize]. If bytesPerSec is
+// 0, rate limiting is disabled. Mesh peers are always exempt from rate limiting.
 func (s *Server) UpdatePerClientRateLimit(bytesPerSec, burst uint64) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.perClientRecvBytesPerSec = bytesPerSec
-	s.perClientRecvBurst = max(burst, derp.MaxPacketSize)
+	s.perClientRecvBurst = max(burst, minRateLimitTokenBucketSize)
 	for _, cs := range s.clients {
 		cs.ForeachClient(func(c *sclient) {
 			c.setRateLimit(s.perClientRecvBytesPerSec, s.perClientRecvBurst)
@@ -1093,10 +1100,9 @@ func (c *sclient) run(ctx context.Context) error {
 			}
 			return fmt.Errorf("client %s: readFrameHeader: %w", c.key.ShortString(), err)
 		}
-		// Rate limit by DERP frame length (fl), which excludes DERP
-		// and TLS protocol overheads.
+		// Rate limit by DERP frame length (fl), which excludes TLS protocol and
+		// DERP frame length field overheads.
 		// Note: meshed clients are exempt from rate limits.
-		// meshed clients are exempt from rate limits
 		if err := c.rateLimit(int(fl)); err != nil {
 			return err // context canceled, connection closing
 		}
@@ -1339,7 +1345,17 @@ func (c *sclient) setRateLimit(bytesPerSec uint64, burst uint64) {
 // and this is a no-op.
 func (c *sclient) rateLimit(n int) error {
 	if lim := c.recvLim.Load(); lim != nil {
-		return lim.WaitN(c.ctx, n)
+		// If n exceeds the capacity of the bucket, then WaitN will return
+		// an error and consume zero tokens. To prevent this, clamp n to
+		// [minRateLimitTokenBucketSize].
+		//
+		// While we could call WaitN multiple times and/or more precisely for
+		// lim.Burst(), it's better to return early as a larger DERP frame is:
+		//   1. unexpected
+		//   2. only partially read off the socket (bufio)
+		//   3. would cause the connection to close shortly after rate limiting, anyway.
+		clampedN := min(n, minRateLimitTokenBucketSize)
+		return lim.WaitN(c.ctx, clampedN)
 	}
 	return nil
 }
diff --git a/derp/derpserver/derpserver_test.go b/derp/derpserver/derpserver_test.go
index b2ecea29f..d7281bee3 100644
--- a/derp/derpserver/derpserver_test.go
+++ b/derp/derpserver/derpserver_test.go
@@ -959,25 +959,21 @@ func BenchmarkHyperLogLogEstimate(b *testing.B) {
 func TestPerClientRateLimit(t *testing.T) {
 	t.Run("throttled", func(t *testing.T) {
 		synctest.Test(t, func(t *testing.T) {
-			// 100 bytes/sec with a burst of 100 bytes.
-			const bytesPerSec = 100
-			const burst = 100
-
 			ctx, cancel := context.WithCancel(context.Background())
 			t.Cleanup(cancel)
 
 			c := &sclient{
 				ctx: ctx,
 			}
-			c.recvLim.Store(rate.NewLimiter(rate.Limit(bytesPerSec), burst))
+			c.recvLim.Store(rate.NewLimiter(rate.Limit(minRateLimitTokenBucketSize), minRateLimitTokenBucketSize))
 
 			// First call within burst should not block.
-			c.rateLimit(burst)
+			c.rateLimit(minRateLimitTokenBucketSize)
 
 			// Next call exceeds burst, should block until tokens replenish.
 			done := make(chan error, 1)
 			go func() {
-				done <- c.rateLimit(burst)
+				done <- c.rateLimit(minRateLimitTokenBucketSize)
 			}()
 
 			// After settling, the goroutine should be blocked (no result yet).
@@ -988,7 +984,7 @@ func TestPerClientRateLimit(t *testing.T) {
 			default:
 			}
 
-			// Advance time by 1 second; 100 bytes/sec * 1s = 100 bytes = burst.
+			// Advance time by 1 second
 			time.Sleep(1 * time.Second)
 			synctest.Wait()
 
@@ -1010,16 +1006,16 @@ func TestPerClientRateLimit(t *testing.T) {
 			c := &sclient{
 				ctx: ctx,
 			}
-			c.recvLim.Store(rate.NewLimiter(rate.Limit(100), 100))
+			c.recvLim.Store(rate.NewLimiter(rate.Limit(minRateLimitTokenBucketSize), minRateLimitTokenBucketSize))
 
 			// Exhaust burst.
-			if err := c.rateLimit(100); err != nil {
+			if err := c.rateLimit(minRateLimitTokenBucketSize); err != nil {
 				t.Fatalf("rateLimit: %v", err)
 			}
 
 			done := make(chan error, 1)
 			go func() {
-				done <- c.rateLimit(100)
+				done <- c.rateLimit(minRateLimitTokenBucketSize)
 			}()
 			synctest.Wait()
 
@@ -1310,8 +1306,8 @@ func TestLoadAndApplyRateConfig(t *testing.T) {
 		s.mu.Lock()
 		gotBurst := s.perClientRecvBurst
 		s.mu.Unlock()
-		if gotBurst != derp.MaxPacketSize {
-			t.Errorf("burst = %d; want at least %d", gotBurst, derp.MaxPacketSize)
+		if gotBurst != minRateLimitTokenBucketSize {
+			t.Errorf("burst = %d; want at least %d", gotBurst, minRateLimitTokenBucketSize)
 		}
 	})