wgengine/magicsock: shorten process internal DERP queue

DERP writes go via TCP and the host OS will have plenty of buffer space.
We've observed in the wild with a backed up TCP socket kernel side
buffers of >2.4MB. The DERP internal queue being larger causes an
increase in the probability that the contents of the backbuffer are
"dead letters" - packets that were assumed to be lost.

A first step to improvement is to size this queue only large enough to
avoid some of the initial connect stall problem, but not large enough
that it is contributing in a substantial way to buffer bloat /
dead-letter retention.

Updates tailscale/corp#31762

Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
James Tucker 2025-08-28 12:00:03 -07:00 committed by James Tucker
parent d42f0b6a21
commit f5d3c59a92
6 changed files with 15 additions and 69 deletions

View File

@ -958,7 +958,6 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+ tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+ tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+ tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
tailscale.com/util/systemd from tailscale.com/control/controlclient+ tailscale.com/util/systemd from tailscale.com/control/controlclient+
tailscale.com/util/testenv from tailscale.com/control/controlclient+ tailscale.com/util/testenv from tailscale.com/control/controlclient+
tailscale.com/util/truncate from tailscale.com/logtail tailscale.com/util/truncate from tailscale.com/logtail

View File

@ -435,7 +435,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+ tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+ tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+ tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
tailscale.com/util/systemd from tailscale.com/control/controlclient+ tailscale.com/util/systemd from tailscale.com/control/controlclient+
tailscale.com/util/testenv from tailscale.com/ipn/ipnlocal+ tailscale.com/util/testenv from tailscale.com/ipn/ipnlocal+
tailscale.com/util/truncate from tailscale.com/logtail tailscale.com/util/truncate from tailscale.com/logtail

View File

@ -387,7 +387,6 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar
tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+ tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+
tailscale.com/util/syspolicy/setting from tailscale.com/client/local+ tailscale.com/util/syspolicy/setting from tailscale.com/client/local+
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+ tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
tailscale.com/util/systemd from tailscale.com/control/controlclient+ tailscale.com/util/systemd from tailscale.com/control/controlclient+
tailscale.com/util/testenv from tailscale.com/control/controlclient+ tailscale.com/util/testenv from tailscale.com/control/controlclient+
tailscale.com/util/truncate from tailscale.com/logtail tailscale.com/util/truncate from tailscale.com/logtail

View File

@ -382,7 +382,6 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware)
tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+ tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+
tailscale.com/util/syspolicy/setting from tailscale.com/client/local+ tailscale.com/util/syspolicy/setting from tailscale.com/client/local+
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+ tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
tailscale.com/util/systemd from tailscale.com/control/controlclient+ tailscale.com/util/systemd from tailscale.com/control/controlclient+
tailscale.com/util/testenv from tailscale.com/control/controlclient+ tailscale.com/util/testenv from tailscale.com/control/controlclient+
tailscale.com/util/truncate from tailscale.com/logtail tailscale.com/util/truncate from tailscale.com/logtail

View File

@ -11,9 +11,7 @@ import (
"net" "net"
"net/netip" "net/netip"
"reflect" "reflect"
"runtime"
"slices" "slices"
"sync"
"time" "time"
"unsafe" "unsafe"
@ -32,7 +30,6 @@ import (
"tailscale.com/types/logger" "tailscale.com/types/logger"
"tailscale.com/util/mak" "tailscale.com/util/mak"
"tailscale.com/util/rands" "tailscale.com/util/rands"
"tailscale.com/util/sysresources"
"tailscale.com/util/testenv" "tailscale.com/util/testenv"
) )
@ -282,59 +279,20 @@ func (c *Conn) goDerpConnect(regionID int) {
go c.derpWriteChanForRegion(regionID, key.NodePublic{}) go c.derpWriteChanForRegion(regionID, key.NodePublic{})
} }
var ( // derpWriteQueueDepth is the depth of the in-process write queue to a single
bufferedDerpWrites int // DERP region. DERP connections are TCP, and so the actual write queue depth is
bufferedDerpWritesOnce sync.Once // substantially larger than this suggests - often scaling into megabytes
) // depending on dynamic TCP parameters and platform TCP tuning. This queue is
// excess of the TCP buffer depth, which means it's almost pure buffer bloat,
// bufferedDerpWritesBeforeDrop returns how many packets writes can be queued // and does not want to be deep - if there are key situations where a node can't
// up the DERP client to write on the wire before we start dropping. // keep up, either the TCP link to DERP is too slow, or there is a
func bufferedDerpWritesBeforeDrop() int { // synchronization issue in the write path, fixes should be focused on those
// For mobile devices, always return the previous minimum value of 32; // paths, rather than extending this queue.
// we can do this outside the sync.Once to avoid that overhead. // TODO(raggi): make this even shorter, ideally this should be a fairly direct
if runtime.GOOS == "ios" || runtime.GOOS == "android" { // line into a socket TCP buffer. The challenge at present is that connect and
return 32 // reconnect are in the write path and we don't want to block other write
} // operations on those.
const derpWriteQueueDepth = 32
bufferedDerpWritesOnce.Do(func() {
// Some rough sizing: for the previous fixed value of 32, the
// total consumed memory can be:
// = numDerpRegions * messages/region * sizeof(message)
//
// For sake of this calculation, assume 100 DERP regions; at
// time of writing (2023-04-03), we have 24.
//
// A reasonable upper bound for the worst-case average size of
// a message is a *disco.CallMeMaybe message with 16 endpoints;
// since sizeof(netip.AddrPort) = 32, that's 512 bytes. Thus:
// = 100 * 32 * 512
// = 1638400 (1.6MiB)
//
// On a reasonably-small node with 4GiB of memory that's
// connected to each region and handling a lot of load, 1.6MiB
// is about 0.04% of the total system memory.
//
// For sake of this calculation, then, let's double that memory
// usage to 0.08% and scale based on total system memory.
//
// For a 16GiB Linux box, this should buffer just over 256
// messages.
systemMemory := sysresources.TotalMemory()
memoryUsable := float64(systemMemory) * 0.0008
const (
theoreticalDERPRegions = 100
messageMaximumSizeBytes = 512
)
bufferedDerpWrites = int(memoryUsable / (theoreticalDERPRegions * messageMaximumSizeBytes))
// Never drop below the previous minimum value.
if bufferedDerpWrites < 32 {
bufferedDerpWrites = 32
}
})
return bufferedDerpWrites
}
// derpWriteChanForRegion returns a channel to which to send DERP packet write // derpWriteChanForRegion returns a channel to which to send DERP packet write
// requests. It creates a new DERP connection to regionID if necessary. // requests. It creates a new DERP connection to regionID if necessary.
@ -429,7 +387,7 @@ func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<-
dc.DNSCache = dnscache.Get() dc.DNSCache = dnscache.Get()
ctx, cancel := context.WithCancel(c.connCtx) ctx, cancel := context.WithCancel(c.connCtx)
ch := make(chan derpWriteRequest, bufferedDerpWritesBeforeDrop()) ch := make(chan derpWriteRequest, derpWriteQueueDepth)
ad.c = dc ad.c = dc
ad.writeCh = ch ad.writeCh = ch

View File

@ -2137,14 +2137,6 @@ func TestOnNodeViewsUpdateWithNoPeers(t *testing.T) {
} }
} }
func TestBufferedDerpWritesBeforeDrop(t *testing.T) {
vv := bufferedDerpWritesBeforeDrop()
if vv < 32 {
t.Fatalf("got bufferedDerpWritesBeforeDrop=%d, which is < 32", vv)
}
t.Logf("bufferedDerpWritesBeforeDrop = %d", vv)
}
// newWireguard starts up a new wireguard-go device attached to a test tun, and // newWireguard starts up a new wireguard-go device attached to a test tun, and
// returns the device, tun and endpoint port. To add peers call device.IpcSet with UAPI instructions. // returns the device, tun and endpoint port. To add peers call device.IpcSet with UAPI instructions.
func newWireguard(t *testing.T, uapi string, aips []netip.Prefix) (*device.Device, *tuntest.ChannelTUN, uint16) { func newWireguard(t *testing.T, uapi string, aips []netip.Prefix) (*device.Device, *tuntest.ChannelTUN, uint16) {