mirror of
https://github.com/tailscale/tailscale.git
synced 2025-09-21 13:41:46 +02:00
wgengine/magicsock: shorten process internal DERP queue
DERP writes go via TCP and the host OS will have plenty of buffer space. We've observed in the wild with a backed up TCP socket kernel side buffers of >2.4MB. The DERP internal queue being larger causes an increase in the probability that the contents of the backbuffer are "dead letters" - packets that were assumed to be lost. A first step to improvement is to size this queue only large enough to avoid some of the initial connect stall problem, but not large enough that it is contributing in a substantial way to buffer bloat / dead-letter retention. Updates tailscale/corp#31762 Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
parent
d42f0b6a21
commit
f5d3c59a92
@ -958,7 +958,6 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
|
|||||||
tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+
|
tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+
|
||||||
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+
|
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+
|
||||||
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
||||||
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
|
|
||||||
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
||||||
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
||||||
tailscale.com/util/truncate from tailscale.com/logtail
|
tailscale.com/util/truncate from tailscale.com/logtail
|
||||||
|
@ -435,7 +435,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
|
|||||||
tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+
|
tailscale.com/util/syspolicy/rsop from tailscale.com/util/syspolicy+
|
||||||
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+
|
tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy+
|
||||||
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
||||||
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
|
|
||||||
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
||||||
tailscale.com/util/testenv from tailscale.com/ipn/ipnlocal+
|
tailscale.com/util/testenv from tailscale.com/ipn/ipnlocal+
|
||||||
tailscale.com/util/truncate from tailscale.com/logtail
|
tailscale.com/util/truncate from tailscale.com/logtail
|
||||||
|
@ -387,7 +387,6 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar
|
|||||||
tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+
|
tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+
|
||||||
tailscale.com/util/syspolicy/setting from tailscale.com/client/local+
|
tailscale.com/util/syspolicy/setting from tailscale.com/client/local+
|
||||||
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
||||||
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
|
|
||||||
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
||||||
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
||||||
tailscale.com/util/truncate from tailscale.com/logtail
|
tailscale.com/util/truncate from tailscale.com/logtail
|
||||||
|
@ -382,7 +382,6 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware)
|
|||||||
tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+
|
tailscale.com/util/syspolicy/rsop from tailscale.com/ipn/ipnlocal+
|
||||||
tailscale.com/util/syspolicy/setting from tailscale.com/client/local+
|
tailscale.com/util/syspolicy/setting from tailscale.com/client/local+
|
||||||
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
tailscale.com/util/syspolicy/source from tailscale.com/util/syspolicy+
|
||||||
tailscale.com/util/sysresources from tailscale.com/wgengine/magicsock
|
|
||||||
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
tailscale.com/util/systemd from tailscale.com/control/controlclient+
|
||||||
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
tailscale.com/util/testenv from tailscale.com/control/controlclient+
|
||||||
tailscale.com/util/truncate from tailscale.com/logtail
|
tailscale.com/util/truncate from tailscale.com/logtail
|
||||||
|
@ -11,9 +11,7 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"reflect"
|
"reflect"
|
||||||
"runtime"
|
|
||||||
"slices"
|
"slices"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
@ -32,7 +30,6 @@ import (
|
|||||||
"tailscale.com/types/logger"
|
"tailscale.com/types/logger"
|
||||||
"tailscale.com/util/mak"
|
"tailscale.com/util/mak"
|
||||||
"tailscale.com/util/rands"
|
"tailscale.com/util/rands"
|
||||||
"tailscale.com/util/sysresources"
|
|
||||||
"tailscale.com/util/testenv"
|
"tailscale.com/util/testenv"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -282,59 +279,20 @@ func (c *Conn) goDerpConnect(regionID int) {
|
|||||||
go c.derpWriteChanForRegion(regionID, key.NodePublic{})
|
go c.derpWriteChanForRegion(regionID, key.NodePublic{})
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
// derpWriteQueueDepth is the depth of the in-process write queue to a single
|
||||||
bufferedDerpWrites int
|
// DERP region. DERP connections are TCP, and so the actual write queue depth is
|
||||||
bufferedDerpWritesOnce sync.Once
|
// substantially larger than this suggests - often scaling into megabytes
|
||||||
)
|
// depending on dynamic TCP parameters and platform TCP tuning. This queue is
|
||||||
|
// excess of the TCP buffer depth, which means it's almost pure buffer bloat,
|
||||||
// bufferedDerpWritesBeforeDrop returns how many packets writes can be queued
|
// and does not want to be deep - if there are key situations where a node can't
|
||||||
// up the DERP client to write on the wire before we start dropping.
|
// keep up, either the TCP link to DERP is too slow, or there is a
|
||||||
func bufferedDerpWritesBeforeDrop() int {
|
// synchronization issue in the write path, fixes should be focused on those
|
||||||
// For mobile devices, always return the previous minimum value of 32;
|
// paths, rather than extending this queue.
|
||||||
// we can do this outside the sync.Once to avoid that overhead.
|
// TODO(raggi): make this even shorter, ideally this should be a fairly direct
|
||||||
if runtime.GOOS == "ios" || runtime.GOOS == "android" {
|
// line into a socket TCP buffer. The challenge at present is that connect and
|
||||||
return 32
|
// reconnect are in the write path and we don't want to block other write
|
||||||
}
|
// operations on those.
|
||||||
|
const derpWriteQueueDepth = 32
|
||||||
bufferedDerpWritesOnce.Do(func() {
|
|
||||||
// Some rough sizing: for the previous fixed value of 32, the
|
|
||||||
// total consumed memory can be:
|
|
||||||
// = numDerpRegions * messages/region * sizeof(message)
|
|
||||||
//
|
|
||||||
// For sake of this calculation, assume 100 DERP regions; at
|
|
||||||
// time of writing (2023-04-03), we have 24.
|
|
||||||
//
|
|
||||||
// A reasonable upper bound for the worst-case average size of
|
|
||||||
// a message is a *disco.CallMeMaybe message with 16 endpoints;
|
|
||||||
// since sizeof(netip.AddrPort) = 32, that's 512 bytes. Thus:
|
|
||||||
// = 100 * 32 * 512
|
|
||||||
// = 1638400 (1.6MiB)
|
|
||||||
//
|
|
||||||
// On a reasonably-small node with 4GiB of memory that's
|
|
||||||
// connected to each region and handling a lot of load, 1.6MiB
|
|
||||||
// is about 0.04% of the total system memory.
|
|
||||||
//
|
|
||||||
// For sake of this calculation, then, let's double that memory
|
|
||||||
// usage to 0.08% and scale based on total system memory.
|
|
||||||
//
|
|
||||||
// For a 16GiB Linux box, this should buffer just over 256
|
|
||||||
// messages.
|
|
||||||
systemMemory := sysresources.TotalMemory()
|
|
||||||
memoryUsable := float64(systemMemory) * 0.0008
|
|
||||||
|
|
||||||
const (
|
|
||||||
theoreticalDERPRegions = 100
|
|
||||||
messageMaximumSizeBytes = 512
|
|
||||||
)
|
|
||||||
bufferedDerpWrites = int(memoryUsable / (theoreticalDERPRegions * messageMaximumSizeBytes))
|
|
||||||
|
|
||||||
// Never drop below the previous minimum value.
|
|
||||||
if bufferedDerpWrites < 32 {
|
|
||||||
bufferedDerpWrites = 32
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return bufferedDerpWrites
|
|
||||||
}
|
|
||||||
|
|
||||||
// derpWriteChanForRegion returns a channel to which to send DERP packet write
|
// derpWriteChanForRegion returns a channel to which to send DERP packet write
|
||||||
// requests. It creates a new DERP connection to regionID if necessary.
|
// requests. It creates a new DERP connection to regionID if necessary.
|
||||||
@ -429,7 +387,7 @@ func (c *Conn) derpWriteChanForRegion(regionID int, peer key.NodePublic) chan<-
|
|||||||
dc.DNSCache = dnscache.Get()
|
dc.DNSCache = dnscache.Get()
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(c.connCtx)
|
ctx, cancel := context.WithCancel(c.connCtx)
|
||||||
ch := make(chan derpWriteRequest, bufferedDerpWritesBeforeDrop())
|
ch := make(chan derpWriteRequest, derpWriteQueueDepth)
|
||||||
|
|
||||||
ad.c = dc
|
ad.c = dc
|
||||||
ad.writeCh = ch
|
ad.writeCh = ch
|
||||||
|
@ -2137,14 +2137,6 @@ func TestOnNodeViewsUpdateWithNoPeers(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBufferedDerpWritesBeforeDrop(t *testing.T) {
|
|
||||||
vv := bufferedDerpWritesBeforeDrop()
|
|
||||||
if vv < 32 {
|
|
||||||
t.Fatalf("got bufferedDerpWritesBeforeDrop=%d, which is < 32", vv)
|
|
||||||
}
|
|
||||||
t.Logf("bufferedDerpWritesBeforeDrop = %d", vv)
|
|
||||||
}
|
|
||||||
|
|
||||||
// newWireguard starts up a new wireguard-go device attached to a test tun, and
|
// newWireguard starts up a new wireguard-go device attached to a test tun, and
|
||||||
// returns the device, tun and endpoint port. To add peers call device.IpcSet with UAPI instructions.
|
// returns the device, tun and endpoint port. To add peers call device.IpcSet with UAPI instructions.
|
||||||
func newWireguard(t *testing.T, uapi string, aips []netip.Prefix) (*device.Device, *tuntest.ChannelTUN, uint16) {
|
func newWireguard(t *testing.T, uapi string, aips []netip.Prefix) (*device.Device, *tuntest.ChannelTUN, uint16) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user