tailscale/wgengine/userspace_test.go
Fernando Serboncini 5834058269
wgengine: replace reflect.DeepEqual with typed Equal for maybeReconfigInputs (#19365)
reflect.DeepEqual is expensive and allocates heavily. Replace it with
a field-by-field comparison that does zero allocations.

Adds tests and benchmarks for the new Equal method.

Fixes #19363

Signed-off-by: Fernando Serboncini <fserb@tailscale.com>
2026-04-14 13:16:21 -04:00

728 lines
18 KiB
Go

// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package wgengine
import (
"fmt"
"math/rand"
"net/netip"
"os"
"reflect"
"runtime"
"testing"
"go4.org/mem"
"tailscale.com/cmd/testwrapper/flakytest"
"tailscale.com/control/controlknobs"
"tailscale.com/envknob"
"tailscale.com/health"
"tailscale.com/net/dns"
"tailscale.com/net/netaddr"
"tailscale.com/net/tstun"
"tailscale.com/tailcfg"
"tailscale.com/tstest"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
"tailscale.com/types/netmap"
"tailscale.com/types/opt"
"tailscale.com/types/views"
"tailscale.com/util/eventbus/eventbustest"
"tailscale.com/util/usermetric"
"tailscale.com/wgengine/router"
"tailscale.com/wgengine/wgcfg"
)
func TestNoteReceiveActivity(t *testing.T) {
now := mono.Time(123456)
var logBuf tstest.MemLogger
confc := make(chan bool, 1)
gotConf := func() bool {
select {
case <-confc:
return true
default:
return false
}
}
e := &userspaceEngine{
timeNow: func() mono.Time { return now },
recvActivityAt: map[key.NodePublic]mono.Time{},
logf: logBuf.Logf,
tundev: new(tstun.Wrapper),
testMaybeReconfigHook: func() { confc <- true },
trimmedNodes: map[key.NodePublic]bool{},
}
ra := e.recvActivityAt
nk := key.NewNode().Public()
// Activity on an untracked key should do nothing.
e.noteRecvActivity(nk)
if len(ra) != 0 {
t.Fatalf("unexpected growth in map: now has %d keys; want 0", len(ra))
}
if logBuf.Len() != 0 {
t.Fatalf("unexpected log write (and thus activity): %s", logBuf.Bytes())
}
// Now track it, but don't mark it trimmed, so shouldn't update.
ra[nk] = 0
e.noteRecvActivity(nk)
if len(ra) != 1 {
t.Fatalf("unexpected growth in map: now has %d keys; want 1", len(ra))
}
if got := ra[nk]; got != now {
t.Fatalf("time in map = %v; want %v", got, now)
}
if gotConf() {
t.Fatalf("unexpected reconfig")
}
// Now mark it trimmed and expect an update.
e.trimmedNodes[nk] = true
e.noteRecvActivity(nk)
if len(ra) != 1 {
t.Fatalf("unexpected growth in map: now has %d keys; want 1", len(ra))
}
if got := ra[nk]; got != now {
t.Fatalf("time in map = %v; want %v", got, now)
}
if !gotConf() {
t.Fatalf("didn't get expected reconfig")
}
}
func nodeViews(v []*tailcfg.Node) []tailcfg.NodeView {
nv := make([]tailcfg.NodeView, len(v))
for i, n := range v {
nv[i] = n.View()
}
return nv
}
func TestUserspaceEngineReconfig(t *testing.T) {
bus := eventbustest.NewBus(t)
ht := health.NewTracker(bus)
reg := new(usermetric.Registry)
e, err := NewFakeUserspaceEngine(t.Logf, 0, ht, reg, bus)
if err != nil {
t.Fatal(err)
}
t.Cleanup(e.Close)
ue := e.(*userspaceEngine)
routerCfg := &router.Config{}
for _, nodeHex := range []string{
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
} {
nm := &netmap.NetworkMap{
Peers: nodeViews([]*tailcfg.Node{
{
ID: 1,
Key: nkFromHex(nodeHex),
},
}),
}
nk, err := key.ParseNodePublicUntyped(mem.S(nodeHex))
if err != nil {
t.Fatal(err)
}
cfg := &wgcfg.Config{
Peers: []wgcfg.Peer{
{
PublicKey: nk,
AllowedIPs: []netip.Prefix{
netip.PrefixFrom(netaddr.IPv4(100, 100, 99, 1), 32),
},
},
},
}
e.SetNetworkMap(nm)
err = e.Reconfig(cfg, routerCfg, &dns.Config{})
if err != nil {
t.Fatal(err)
}
wantRecvAt := map[key.NodePublic]mono.Time{
nkFromHex(nodeHex): 0,
}
if got := ue.recvActivityAt; !reflect.DeepEqual(got, wantRecvAt) {
t.Errorf("wrong recvActivityAt\n got: %v\nwant: %v\n", got, wantRecvAt)
}
wantTrimmedNodes := map[key.NodePublic]bool{
nkFromHex(nodeHex): true,
}
if got := ue.trimmedNodes; !reflect.DeepEqual(got, wantTrimmedNodes) {
t.Errorf("wrong wantTrimmedNodes\n got: %v\nwant: %v\n", got, wantTrimmedNodes)
}
}
}
func TestUserspaceEngineTSMPLearned(t *testing.T) {
bus := eventbustest.NewBus(t)
ht := health.NewTracker(bus)
reg := new(usermetric.Registry)
e, err := NewFakeUserspaceEngine(t.Logf, 0, ht, reg, bus)
if err != nil {
t.Fatal(err)
}
t.Cleanup(e.Close)
ue := e.(*userspaceEngine)
discoChangedChan := make(chan map[key.NodePublic]bool, 1)
ue.testDiscoChangedHook = func(m map[key.NodePublic]bool) {
discoChangedChan <- m
}
routerCfg := &router.Config{}
keyChanges := []struct {
tsmp bool
inMap bool
}{
{tsmp: false, inMap: false},
{tsmp: true, inMap: false},
{tsmp: false, inMap: true},
}
nkHex := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
for _, change := range keyChanges {
oldDisco := key.NewDisco()
nm := &netmap.NetworkMap{
Peers: nodeViews([]*tailcfg.Node{
{
ID: 1,
Key: nkFromHex(nkHex),
DiscoKey: oldDisco.Public(),
},
}),
}
nk, err := key.ParseNodePublicUntyped(mem.S(nkHex))
if err != nil {
t.Fatal(err)
}
e.SetNetworkMap(nm)
newDisco := key.NewDisco()
cfg := &wgcfg.Config{
Peers: []wgcfg.Peer{
{
PublicKey: nk,
DiscoKey: newDisco.Public(),
},
},
}
if change.tsmp {
ue.PatchDiscoKey(nk, newDisco.Public())
}
err = e.Reconfig(cfg, routerCfg, &dns.Config{})
if err != nil {
t.Fatal(err)
}
changeMap := <-discoChangedChan
if _, ok := changeMap[nk]; ok != change.inMap {
t.Fatalf("expect key %v in map %v to be %t, got %t", nk, changeMap,
change.inMap, ok)
}
}
}
func TestUserspaceEngineTSMPLearnedMismatch(t *testing.T) {
bus := eventbustest.NewBus(t)
ht := health.NewTracker(bus)
reg := new(usermetric.Registry)
e, err := NewFakeUserspaceEngine(t.Logf, 0, ht, reg, bus)
if err != nil {
t.Fatal(err)
}
t.Cleanup(e.Close)
ue := e.(*userspaceEngine)
discoChangedChan := make(chan map[key.NodePublic]bool, 1)
ue.testDiscoChangedHook = func(m map[key.NodePublic]bool) {
discoChangedChan <- m
}
routerCfg := &router.Config{}
var metricValue int64 = 0
keyChanges := []struct {
tsmp bool
inMap bool
wrongKey bool
}{
{tsmp: false, inMap: false, wrongKey: false},
{tsmp: true, inMap: false, wrongKey: true},
{tsmp: false, inMap: false, wrongKey: false},
}
nkHex := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
for _, change := range keyChanges {
oldDisco := key.NewDisco()
nm := &netmap.NetworkMap{
Peers: nodeViews([]*tailcfg.Node{
{
ID: 1,
Key: nkFromHex(nkHex),
DiscoKey: oldDisco.Public(),
},
}),
}
nk, err := key.ParseNodePublicUntyped(mem.S(nkHex))
if err != nil {
t.Fatal(err)
}
e.SetNetworkMap(nm)
newDisco := key.NewDisco()
cfg := &wgcfg.Config{
Peers: []wgcfg.Peer{
{
PublicKey: nk,
DiscoKey: newDisco.Public(),
},
},
}
tsmpKey := newDisco.Public()
if change.tsmp {
if change.wrongKey {
tsmpKey = key.NewDisco().Public()
}
ue.PatchDiscoKey(nk, tsmpKey)
}
err = e.Reconfig(cfg, routerCfg, &dns.Config{})
if err != nil {
t.Fatal(err)
}
changeMap := <-discoChangedChan
if _, ok := changeMap[nk]; ok != change.inMap {
t.Fatalf("expect key %v in map %v to be %t, got %t", nk, changeMap,
change.inMap, ok)
}
metric := metricTSMPLearnedKeyMismatch.Value()
delta := metric - metricValue
metricValue = metric
if change.wrongKey && delta != 1 {
t.Fatalf("expected a delta of 1, got %d", delta)
}
}
}
func TestUserspaceEnginePortReconfig(t *testing.T) {
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/2855")
const defaultPort = 49983
var knobs controlknobs.Knobs
bus := eventbustest.NewBus(t)
// Keep making a wgengine until we find an unused port
var ue *userspaceEngine
ht := health.NewTracker(bus)
reg := new(usermetric.Registry)
for range 100 {
attempt := uint16(defaultPort + rand.Intn(1000))
e, err := NewFakeUserspaceEngine(t.Logf, attempt, &knobs, ht, reg, bus)
if err != nil {
t.Fatal(err)
}
ue = e.(*userspaceEngine)
if ue.magicConn.LocalPort() == attempt {
break
}
ue.Close()
ue = nil
}
if ue == nil {
t.Fatal("could not create a wgengine with a specific port")
}
t.Cleanup(ue.Close)
startingPort := ue.magicConn.LocalPort()
nodeKey, err := key.ParseNodePublicUntyped(mem.S("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
if err != nil {
t.Fatal(err)
}
cfg := &wgcfg.Config{
Peers: []wgcfg.Peer{
{
PublicKey: nodeKey,
AllowedIPs: []netip.Prefix{
netip.PrefixFrom(netaddr.IPv4(100, 100, 99, 1), 32),
},
},
},
}
routerCfg := &router.Config{}
if err := ue.Reconfig(cfg, routerCfg, &dns.Config{}); err != nil {
t.Fatal(err)
}
if got := ue.magicConn.LocalPort(); got != startingPort {
t.Errorf("no debug setting changed local port to %d from %d", got, startingPort)
}
knobs.RandomizeClientPort.Store(true)
if err := ue.Reconfig(cfg, routerCfg, &dns.Config{}); err != nil {
t.Fatal(err)
}
if got := ue.magicConn.LocalPort(); got == startingPort {
t.Errorf("debug setting did not change local port from %d", startingPort)
}
lastPort := ue.magicConn.LocalPort()
knobs.RandomizeClientPort.Store(false)
if err := ue.Reconfig(cfg, routerCfg, &dns.Config{}); err != nil {
t.Fatal(err)
}
if startingPort == defaultPort {
// Only try this if we managed to bind defaultPort the first time.
// Otherwise, assume someone else on the computer is using defaultPort
// and so Reconfig would have caused magicSockt to bind some other port.
if got := ue.magicConn.LocalPort(); got != defaultPort {
t.Errorf("debug setting did not change local port from %d to %d", startingPort, defaultPort)
}
}
if got := ue.magicConn.LocalPort(); got == lastPort {
t.Errorf("Reconfig did not change local port from %d", lastPort)
}
}
// Test that enabling and disabling peer path MTU discovery works correctly.
func TestUserspaceEnginePeerMTUReconfig(t *testing.T) {
if runtime.GOOS != "linux" && runtime.GOOS != "darwin" {
t.Skipf("skipping on %q; peer MTU not supported", runtime.GOOS)
}
defer os.Setenv("TS_DEBUG_ENABLE_PMTUD", os.Getenv("TS_DEBUG_ENABLE_PMTUD"))
envknob.Setenv("TS_DEBUG_ENABLE_PMTUD", "")
// Turn on debugging to help diagnose problems.
defer os.Setenv("TS_DEBUG_PMTUD", os.Getenv("TS_DEBUG_PMTUD"))
envknob.Setenv("TS_DEBUG_PMTUD", "true")
var knobs controlknobs.Knobs
bus := eventbustest.NewBus(t)
ht := health.NewTracker(bus)
reg := new(usermetric.Registry)
e, err := NewFakeUserspaceEngine(t.Logf, 0, &knobs, ht, reg, bus)
if err != nil {
t.Fatal(err)
}
t.Cleanup(e.Close)
ue := e.(*userspaceEngine)
if ue.magicConn.PeerMTUEnabled() != false {
t.Error("peer MTU enabled by default, should not be")
}
osDefaultDF, err := ue.magicConn.DontFragSetting()
if err != nil {
t.Errorf("get don't fragment bit failed: %v", err)
}
t.Logf("Info: OS default don't fragment bit(s) setting: %v", osDefaultDF)
// Build a set of configs to use as we change the peer MTU settings.
nodeKey, err := key.ParseNodePublicUntyped(mem.S("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
if err != nil {
t.Fatal(err)
}
cfg := &wgcfg.Config{
Peers: []wgcfg.Peer{
{
PublicKey: nodeKey,
AllowedIPs: []netip.Prefix{
netip.PrefixFrom(netaddr.IPv4(100, 100, 99, 1), 32),
},
},
},
}
routerCfg := &router.Config{}
tests := []struct {
desc string // test description
wantP bool // desired value of PMTUD setting
wantDF bool // desired value of don't fragment bits
shouldP opt.Bool // if set, force peer MTU to this value
}{
{desc: "after_first_reconfig", wantP: false, wantDF: osDefaultDF, shouldP: ""},
{desc: "enabling_PMTUD_first_time", wantP: true, wantDF: true, shouldP: "true"},
{desc: "disabling_PMTUD", wantP: false, wantDF: false, shouldP: "false"},
{desc: "enabling_PMTUD_second_time", wantP: true, wantDF: true, shouldP: "true"},
{desc: "returning_to_default_PMTUD", wantP: false, wantDF: false, shouldP: ""},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
if v, ok := tt.shouldP.Get(); ok {
knobs.PeerMTUEnable.Store(v)
} else {
knobs.PeerMTUEnable.Store(false)
}
if err := ue.Reconfig(cfg, routerCfg, &dns.Config{}); err != nil {
t.Fatal(err)
}
if v := ue.magicConn.PeerMTUEnabled(); v != tt.wantP {
t.Errorf("peer MTU set to %v, want %v", v, tt.wantP)
}
if v, err := ue.magicConn.DontFragSetting(); v != tt.wantDF || err != nil {
t.Errorf("don't fragment bit set to %v, want %v, err %v", v, tt.wantP, err)
}
})
}
}
func TestTSMPKeyAdvertisement(t *testing.T) {
var knobs controlknobs.Knobs
bus := eventbustest.NewBus(t)
ht := health.NewTracker(bus)
reg := new(usermetric.Registry)
e, err := NewFakeUserspaceEngine(t.Logf, 0, &knobs, ht, reg, bus)
if err != nil {
t.Fatal(err)
}
t.Cleanup(e.Close)
ue := e.(*userspaceEngine)
routerCfg := &router.Config{}
nodeKey := nkFromHex("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")
nm := &netmap.NetworkMap{
Peers: nodeViews([]*tailcfg.Node{
{
ID: 1,
Key: nodeKey,
},
}),
SelfNode: (&tailcfg.Node{
StableID: "TESTCTRL00000001",
Name: "test-node.test.ts.net",
Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32"), netip.MustParsePrefix("fd7a:115c:a1e0:ab12:4843:cd96:0:1/128")},
}).View(),
}
cfg := &wgcfg.Config{
Peers: []wgcfg.Peer{
{
PublicKey: nodeKey,
AllowedIPs: []netip.Prefix{
netip.PrefixFrom(netaddr.IPv4(100, 100, 99, 1), 32),
},
},
},
}
ue.SetNetworkMap(nm)
err = ue.Reconfig(cfg, routerCfg, &dns.Config{})
if err != nil {
t.Fatal(err)
}
addr := netip.MustParseAddr("100.100.99.1")
previousValue := metricTSMPDiscoKeyAdvertisementSent.Value()
ue.sendTSMPDiscoAdvertisement(addr)
if val := metricTSMPDiscoKeyAdvertisementSent.Value(); val <= previousValue {
errs := metricTSMPDiscoKeyAdvertisementError.Value()
t.Errorf("Expected 1 disco key advert, got %d, errors %d", val, errs)
}
// Remove config to have the engine shut down more consistently
err = ue.Reconfig(&wgcfg.Config{}, &router.Config{}, &dns.Config{})
if err != nil {
t.Fatal(err)
}
}
func nkFromHex(hex string) key.NodePublic {
if len(hex) != 64 {
panic(fmt.Sprintf("%q is len %d; want 64", hex, len(hex)))
}
k, err := key.ParseNodePublicUntyped(mem.S(hex[:64]))
if err != nil {
panic(fmt.Sprintf("%q is not hex: %v", hex, err))
}
return k
}
// makeMaybeReconfigInputs builds a maybeReconfigInputs with n peers,
// each with a unique key, disco key, and AllowedIPs entry.
func makeMaybeReconfigInputs(n int) *maybeReconfigInputs {
peers := make([]wgcfg.Peer, n)
trimmed := make(map[key.NodePublic]bool, n)
trackNodes := make([]key.NodePublic, n)
trackIPs := make([]netip.Addr, n)
for i := range n {
nk := key.NewNode()
pub := nk.Public()
peers[i] = wgcfg.Peer{
PublicKey: pub,
DiscoKey: key.NewDisco().Public(),
AllowedIPs: []netip.Prefix{netip.PrefixFrom(netip.AddrFrom4([4]byte{100, 64, byte(i >> 8), byte(i)}), 32)},
}
trimmed[pub] = true
trackNodes[i] = pub
trackIPs[i] = netip.AddrFrom4([4]byte{100, 64, byte(i >> 8), byte(i)})
}
return &maybeReconfigInputs{
WGConfig: &wgcfg.Config{
PrivateKey: key.NewNode(),
Peers: peers,
MTU: 1280,
},
TrimmedNodes: trimmed,
TrackNodes: views.SliceOf(trackNodes),
TrackIPs: views.SliceOf(trackIPs),
}
}
func TestMaybeReconfigInputsEqual(t *testing.T) {
a := makeMaybeReconfigInputs(100)
b := a.Clone()
// nil cases
if !(*maybeReconfigInputs)(nil).Equal(nil) {
t.Error("nil.Equal(nil) should be true")
}
if a.Equal(nil) {
t.Error("non-nil.Equal(nil) should be false")
}
if (*maybeReconfigInputs)(nil).Equal(a) {
t.Error("nil.Equal(non-nil) should be false")
}
// same pointer
if !a.Equal(a) {
t.Error("a.Equal(a) should be true")
}
// cloned equal value
if !a.Equal(b) {
t.Error("a.Equal(clone) should be true")
}
// Verify that every field in the struct is covered by Equal.
// Each entry mutates exactly one field of a clone and expects
// Equal to return false. If a new field is added to
// maybeReconfigInputs without a corresponding entry here, the
// field count check below will fail.
type mutator struct {
field string
fn func(c *maybeReconfigInputs)
}
mutators := []mutator{
{"WGConfig", func(c *maybeReconfigInputs) {
c.WGConfig.MTU = 9999
}},
{"TrimmedNodes", func(c *maybeReconfigInputs) {
c.TrimmedNodes[key.NewNode().Public()] = true
}},
{"TrackNodes", func(c *maybeReconfigInputs) {
ns := c.TrackNodes.AsSlice()
ns[0] = key.NewNode().Public()
c.TrackNodes = views.SliceOf(ns)
}},
{"TrackIPs", func(c *maybeReconfigInputs) {
ips := c.TrackIPs.AsSlice()
ips[0] = netip.MustParseAddr("1.2.3.4")
c.TrackIPs = views.SliceOf(ips)
}},
}
// Ensure we have a mutator for every field.
numFields := reflect.TypeOf(maybeReconfigInputs{}).NumField()
if len(mutators) != numFields {
t.Fatalf("maybeReconfigInputs has %d fields but test covers %d; update the mutators table", numFields, len(mutators))
}
for _, m := range mutators {
c := a.Clone()
m.fn(c)
if a.Equal(c) {
t.Errorf("Equal did not detect change in field %s", m.field)
}
}
}
func BenchmarkMaybeReconfigInputsEqual(b *testing.B) {
for _, n := range []int{10, 100, 1000, 5000} {
b.Run(fmt.Sprintf("peers=%d", n), func(b *testing.B) {
a := makeMaybeReconfigInputs(n)
o := a.Clone()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
a.Equal(o)
}
})
}
}
// an experiment to see if genLocalAddrFunc was worth it. As of Go
// 1.16, it still very much is. (30-40x faster)
func BenchmarkGenLocalAddrFunc(b *testing.B) {
la1 := netip.MustParseAddr("1.2.3.4")
la2 := netip.MustParseAddr("::4")
lanot := netip.MustParseAddr("5.5.5.5")
var x bool
b.Run("map1", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
m := map[netip.Addr]bool{
la1: true,
}
for range b.N {
x = m[la1]
x = m[lanot]
}
})
b.Run("map2", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
m := map[netip.Addr]bool{
la1: true,
la2: true,
}
for range b.N {
x = m[la1]
x = m[lanot]
}
})
b.Run("or1", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
f := func(t netip.Addr) bool {
return t == la1
}
for range b.N {
x = f(la1)
x = f(lanot)
}
})
b.Run("or2", func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
f := func(t netip.Addr) bool {
return t == la1 || t == la2
}
for range b.N {
x = f(la1)
x = f(lanot)
}
})
b.Logf("x = %v", x)
}