mirror of
https://github.com/tailscale/tailscale.git
synced 2025-09-21 13:41:46 +02:00
wgengine/magicsock: shorten process internal DERP queue
DERP writes go via TCP and the host OS will have plenty of buffer space. We've observed in the wild with a backed up TCP socket kernel side buffers of >2.4MB. The DERP internal queue being larger causes an increase in the probability that the contents of the backbuffer are "dead letters" - packets that were assumed to be lost. A first step to improvement is to size this queue only large enough to avoid some of the initial connect stall problem, but not large enough that it is contributing in a substantial way to buffer bloat / dead-letter retention. Updates tailscale/corp#31762 Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
parent
d42f0b6a21
commit
f5d3c59a92
@ -958,7 +958,6 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
|
||||
tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+
|
||||
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+
|
||||
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
||||
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
|
||||
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/truncate from tailscale.com/logtail
|
||||
|
@ -435,7 +435,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
|
||||
tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+
|
||||
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+
|
||||
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
||||
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
|
||||
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/testenv from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/truncate from tailscale.com/logtail
|
||||
|
@ -387,7 +387,6 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar
|
||||
tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/syspolicy/setting from tailscale.com/client/local+
|
||||
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
||||
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
|
||||
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/truncate from tailscale.com/logtail
|
||||
|
@ -382,7 +382,6 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware)
|
||||
tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/syspolicy/setting from tailscale.com/client/local+
|
||||
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
||||
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
|
||||
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/truncate from tailscale.com/logtail
|
||||
|
@ -11,9 +11,7 @@ import (
|
||||
"net"
|
||||
"net/netip"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
@ -32,7 +30,6 @@ import (
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/mak"
|
||||
"tailscale.com/util/rands"
|
||||
"tailscale.com/util/sysresources"
|
||||
"tailscale.com/util/testenv"
|
||||
)
|
||||
|
||||
@ -282,59 +279,20 @@ func (c *Conn) goDerpConnect(regionID int) {
|
||||
go c.derpWriteChanForRegion(regionID, key.NodePublic{})
|
||||
}
|
||||
|
||||
var (
|
||||
bufferedDerpWrites int
|
||||
bufferedDerpWritesOnce sync.Once
|
||||
)
|
||||
|
||||
// bufferedDerpWritesBeforeDrop returns how many packets writes can be queued
|
||||
// up the DERP client to write on the wire before we start dropping.
|
||||
func bufferedDerpWritesBeforeDrop() int {
|
||||
// For mobile devices, always return the previous minimum value of 32;
|
||||
// we can do this outside the sync.Once to avoid that overhead.
|
||||
if runtime.GOOS == "ios" || runtime.GOOS == "android" {
|
||||
return 32
|
||||
}
|
||||
|
||||
bufferedDerpWritesOnce.Do(func() {
|
||||
// Some rough sizing: for the previous fixed value of 32, the
|
||||
// total consumed memory can be:
|
||||
// = numDerpRegions * messages/region * sizeof(message)
|
||||
//
|
||||
// For sake of this calculation, assume 100 DERP regions; at
|
||||
// time of writing (2023-04-03), we have 24.
|
||||
//
|
||||
// A reasonable upper bound for the worst-case average size of
|
||||
// a message is a *disco.CallMeMaybe message with 16 endpoints;
|
||||
// since sizeof(netip.AddrPort) = 32, that's 512 bytes. Thus:
|
||||
// = 100 * 32 * 512
|
||||
// = 1638400 (1.6MiB)
|
||||
//
|
||||
// On a reasonably-small node with 4GiB of memory that's
|
||||
// connected to each region and handling a lot of load, 1.6MiB
|
||||
// is about 0.04% of the total system memory.
|
||||
//
|
||||
// For sake of this calculation, then, let's double that memory
|
||||
// usage to 0.08% and scale based on total system memory.
|
||||
//
|
||||
// For a 16GiB Linux box, this should buffer just over 256
|
||||
// messages.
|
||||
systemMemory := sysresources.TotalMemory()
|
||||
memoryUsable := float64(systemMemory) * 0.0008
|
||||
|
||||
const (
|
||||
theoreticalDERPRegions = 100
|
||||
messageMaximumSizeBytes = 512
|
||||
)
|
||||
bufferedDerpWrites = int(memoryUsable / (theoreticalDERPRegions * messageMaximumSizeBytes))
|
||||
|
||||
// Never drop below the previous minimum value.
|
||||
if bufferedDerpWrites < 32 {
|
||||
bufferedDerpWrites = 32
|
||||
}
|
||||
})
|
||||
return bufferedDerpWrites
|
||||
}
|
||||
// derpWriteQueueDepth is the depth of the in-process write queue to a single
|
||||
// DERP region. DERP connections are TCP, and so the actual write queue depth is
|
||||
// substantially larger than this suggests - often scaling into megabytes
|
||||
// depending on dynamic TCP parameters and platform TCP tuning. This queue is
|
||||
// excess of the TCP buffer depth, which means it's almost pure buffer bloat,
|
||||
// and does not want to be deep - if there are key situations where a node can't
|
||||
// keep up, either the TCP link to DERP is too slow, or there is a
|
||||
// synchronization issue in the write path, fixes should be focused on those
|
||||
// paths, rather than extending this queue.
|
||||
// TODO(raggi): make this even shorter, ideally this should be a fairly direct
|
||||
// line into a socket TCP buffer. The challenge at present is that connect and
|
||||
// reconnect are in the write path and we don't want to block other write
|
||||
// operations on those.
|
||||
const derpWriteQueueDepth = 32
|
||||
|
||||
// derpWriteChanForRegion returns a channel to which to send DERP packet write
|
||||
// requests. It creates a new DERP connection to regionID if necessary.
|
||||
@ -429,7 +387,7 @@ func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<-
|
||||
dc.DNSCache = dnscache.Get()
|
||||
|
||||
ctx, cancel := context.WithCancel(c.connCtx)
|
||||
ch := make(chan derpWriteRequest, bufferedDerpWritesBeforeDrop())
|
||||
ch := make(chan derpWriteRequest, derpWriteQueueDepth)
|
||||
|
||||
ad.c = dc
|
||||
ad.writeCh = ch
|
||||
|
@ -2137,14 +2137,6 @@ func TestOnNodeViewsUpdateWithNoPeers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufferedDerpWritesBeforeDrop(t *testing.T) {
|
||||
vv := bufferedDerpWritesBeforeDrop()
|
||||
if vv < 32 {
|
||||
t.Fatalf("got bufferedDerpWritesBeforeDrop=%d, which is < 32", vv)
|
||||
}
|
||||
t.Logf("bufferedDerpWritesBeforeDrop = %d", vv)
|
||||
}
|
||||
|
||||
// newWireguard starts up a new wireguard-go device attached to a test tun, and
|
||||
// returns the device, tun and endpoint port. To add peers call device.IpcSet with UAPI instructions.
|
||||
func newWireguard(t *testing.T, uapi string, aips []netip.Prefix) (*device.Device, *tuntest.ChannelTUN, uint16) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user