mirror of
https://github.com/tailscale/tailscale.git
synced 2026-05-05 12:16:44 +02:00
tka/sync: send checkpoints to ensure far-behind nodes can catch up
Previously there was a mismatch between how nodes store AUMs and what the control plane would offer during sync: - Client compaction: Nodes aggressively compact their TKA state -- they keep the last 24 AUMs, every AUM received in the last two weeks, and then everything from there back to the last checkpoint. Depending on when it compacts, a node may only have ~50 AUMs. - Exponential sampling: To save bandwidth, the control plane would send a SyncOffer containing ancestors at exponentially increasing intervals (4th, 16th, 64th, 256th...). If a node has been offline for too long, the exponential sampling skips the node's smaller window. When the SyncOffer and local state are disjoint, the node cannot find a common ancestor to use for synchronisation. It enters a failure loop where it keeps polling for new TKA state, but it cannot catch up and has an increasingly-outdated view of the tailnet. This patch replaces the exponential sampling with a SyncOffer that sends every checkpoint ancestor of the current HEAD. Since every node is guaranteed to keep at least one checkpoint after compaction, we're more likely to have an intersection for the sync process. This patch also increases `maxSyncHeadIntersectionIter`, which in practice means the control plane will send every checkpoint in the current chain. This means all affected nodes will be able to find an intersection and catch up immediately, without requiring a client update. It's still possible for a node to be unable to sync, but these edge cases become less likely with this change. (For example, if a node is 1000+ AUMs behind, or if it creates a local branch and then compacts away the intersection with the main chain.) This patch includes a regression test with synthetic data, and I verified the fix with customer data. Updates https://github.com/tailscale/corp/issues/40404 Change-Id: I2174011bb23a2b5972f6d1591aadcc016e3cba35 Signed-off-by: Alex Chan <alexc@tailscale.com>
This commit is contained in:
parent
a6c5d23742
commit
629dc311b4
@ -21,7 +21,7 @@ const (
|
||||
maxSyncIter = 2000
|
||||
|
||||
// Max iterations searching for a head intersection during the sync process.
|
||||
maxSyncHeadIntersectionIter = 400
|
||||
maxSyncHeadIntersectionIter = 1000
|
||||
|
||||
// Limit on scanning AUM trees, chosen arbitrarily.
|
||||
maxScanIterations = 2000
|
||||
|
||||
@ -33,7 +33,7 @@ type State struct {
|
||||
// possesses a valid DisablementSecret. These values are used during the
|
||||
// Tailnet Lock deactivation process.
|
||||
//
|
||||
// These are safe to share publicly or store in the clear. They cannot be
|
||||
// These are safe to share publicly or store in the clear. They cannot be
|
||||
// used to derive the original DisablementSecret.
|
||||
DisablementValues [][]byte `cbor:"2,keyasint"`
|
||||
|
||||
|
||||
35
tka/sync.go
35
tka/sync.go
@ -60,18 +60,6 @@ func FromSyncOffer(offer SyncOffer) (head string, ancestors []string, err error)
|
||||
return string(headBytes), ancestors, nil
|
||||
}
|
||||
|
||||
const (
|
||||
// The starting number of AUMs to skip when listing
|
||||
// ancestors in a SyncOffer.
|
||||
ancestorsSkipStart = 4
|
||||
|
||||
// How many bits to advance the skip count when listing
|
||||
// ancestors in a SyncOffer.
|
||||
//
|
||||
// 2 bits, so (4<<2), so after skipping 4 it skips 16.
|
||||
ancestorsSkipShift = 2
|
||||
)
|
||||
|
||||
// SyncOffer returns an abbreviated description of the current AUM
|
||||
// chain, which can be used to synchronize with another (untrusted)
|
||||
// Authority instance.
|
||||
@ -92,20 +80,10 @@ func (a *Authority) SyncOffer(storage Chonk) (SyncOffer, error) {
|
||||
Ancestors: make([]AUMHash, 0, 6), // 6 chosen arbitrarily.
|
||||
}
|
||||
|
||||
// We send some subset of our ancestors to help the remote
|
||||
// find a more-recent 'head intersection'.
|
||||
// The number of AUMs between each ancestor entry gets
|
||||
// exponentially larger.
|
||||
var (
|
||||
skipAmount uint64 = ancestorsSkipStart
|
||||
curs AUMHash = a.Head()
|
||||
)
|
||||
for i := range uint64(maxSyncHeadIntersectionIter) {
|
||||
if i > 0 && (i%skipAmount) == 0 {
|
||||
out.Ancestors = append(out.Ancestors, curs)
|
||||
skipAmount = skipAmount << ancestorsSkipShift
|
||||
}
|
||||
|
||||
// We send all our checkpoints to help the remote find a
|
||||
// more-recent 'head intersection'.
|
||||
curs := a.Head()
|
||||
for range maxSyncHeadIntersectionIter {
|
||||
parent, err := storage.AUM(curs)
|
||||
if err != nil {
|
||||
if err != os.ErrNotExist {
|
||||
@ -118,6 +96,11 @@ func (a *Authority) SyncOffer(storage Chonk) (SyncOffer, error) {
|
||||
if parent.Hash() == oldest {
|
||||
break
|
||||
}
|
||||
|
||||
if parent.MessageKind == AUMCheckpoint {
|
||||
out.Ancestors = append(out.Ancestors, curs)
|
||||
}
|
||||
|
||||
copy(curs[:], parent.PrevAUMHash)
|
||||
}
|
||||
|
||||
|
||||
264
tka/sync_test.go
264
tka/sync_test.go
@ -5,10 +5,14 @@ package tka
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strconv"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"tailscale.com/tstest"
|
||||
"tailscale.com/util/must"
|
||||
)
|
||||
|
||||
// getSyncOffer returns a SyncOffer for the given Chonk.
|
||||
@ -28,27 +32,113 @@ func getSyncOffer(t *testing.T, storage Chonk) SyncOffer {
|
||||
}
|
||||
|
||||
func TestSyncOffer(t *testing.T) {
|
||||
c := newTestchain(t, `
|
||||
A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
|
||||
A10 -> A11 -> A12 -> A13 -> A14 -> A15 -> A16 -> A17 -> A18
|
||||
A18 -> A19 -> A20 -> A21 -> A22 -> A23 -> A24 -> A25
|
||||
`)
|
||||
got := getSyncOffer(t, c.Chonk())
|
||||
|
||||
// A SyncOffer includes a selection of AUMs going backwards in the tree,
|
||||
// progressively skipping more and more each iteration.
|
||||
want := SyncOffer{
|
||||
Head: c.AUMHashes["A25"],
|
||||
Ancestors: []AUMHash{
|
||||
c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart)],
|
||||
c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart<<ancestorsSkipShift)],
|
||||
c.AUMHashes["A1"],
|
||||
},
|
||||
fakeState := &State{
|
||||
Keys: []Key{{Kind: Key25519, Votes: 1}},
|
||||
DisablementValues: [][]byte{bytes.Repeat([]byte{1}, 32)},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
|
||||
}
|
||||
checkpointTemplate := optTemplate("checkpoint", AUM{MessageKind: AUMCheckpoint, State: fakeState})
|
||||
|
||||
// If we have a small chain with just a handful of AUMs, the SyncOffer
|
||||
// contains the current HEAD and the first checkpoint.
|
||||
t.Run("short-chain", func(t *testing.T) {
|
||||
c := newTestchain(t, `A1 -> A2 -> A3 -> A4 -> A5`)
|
||||
got := getSyncOffer(t, c.Chonk())
|
||||
|
||||
// A SyncOffer includes the first checkpoint.
|
||||
want := SyncOffer{
|
||||
Head: c.AUMHashes["A5"],
|
||||
Ancestors: []AUMHash{
|
||||
c.AUMHashes["A1"],
|
||||
},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
|
||||
// If the chain contains multiple checkpoints, the SyncOffer includes
|
||||
// all of them.
|
||||
t.Run("chain-with-multiple-checkpoints", func(t *testing.T) {
|
||||
c := newTestchain(t, `
|
||||
A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
|
||||
A10 -> A11 -> A12 -> A13 -> A14 -> A15 -> A16 -> A17 -> A18
|
||||
A18 -> A19 -> A20 -> A21 -> A22 -> A23 -> A24 -> A25 -> A26
|
||||
A26 -> A27 -> A28 -> A29 -> A30 -> A31 -> A32 -> A33 -> A34
|
||||
A34 -> A35 -> A36 -> A37 -> A38 -> A39 -> A40 -> A41 -> A42
|
||||
A42 -> A43 -> A45 -> A46 -> A47 -> A48 -> A49 -> A50 -> A51
|
||||
A51 -> A52 -> A53 -> A54 -> A55
|
||||
|
||||
A1.template = checkpoint
|
||||
A11.template = checkpoint
|
||||
A21.template = checkpoint
|
||||
A31.template = checkpoint
|
||||
A41.template = checkpoint
|
||||
A51.template = checkpoint
|
||||
`, checkpointTemplate)
|
||||
got := getSyncOffer(t, c.Chonk())
|
||||
|
||||
// A SyncOffer includes the first checkpoint.
|
||||
want := SyncOffer{
|
||||
Head: c.AUMHashes["A55"],
|
||||
Ancestors: []AUMHash{
|
||||
c.AUMHashes["A51"],
|
||||
c.AUMHashes["A41"],
|
||||
c.AUMHashes["A31"],
|
||||
c.AUMHashes["A21"],
|
||||
c.AUMHashes["A11"],
|
||||
c.AUMHashes["A1"],
|
||||
},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
|
||||
// The size of a SyncOffer does not grow without bound as the number of AUMs increases.
|
||||
t.Run("long-chain-size-is-bounded", func(t *testing.T) {
|
||||
size := 1800
|
||||
|
||||
// Build a template string with a checkpoint every 50 AUMs.
|
||||
var sb strings.Builder
|
||||
sb.WriteString("A")
|
||||
for i := range size {
|
||||
sb.WriteString(fmt.Sprintf(" -> A%d", i))
|
||||
}
|
||||
for i := range size {
|
||||
if i%50 == 0 {
|
||||
sb.WriteString(fmt.Sprintf("\nA%d.template = checkpoint", i))
|
||||
}
|
||||
}
|
||||
|
||||
c := newTestchain(t, sb.String(), checkpointTemplate)
|
||||
got := getSyncOffer(t, c.Chonk())
|
||||
|
||||
// We expect the SyncOffer to include:
|
||||
//
|
||||
// - the latest AUM as the HEAD
|
||||
// - the checkpoints from the last 1000 AUMs (maxSyncHeadIntersectionIter)
|
||||
// - the oldest AUM in storage
|
||||
//
|
||||
want := SyncOffer{
|
||||
Head: c.AUMHashes["A1799"],
|
||||
Ancestors: []AUMHash{
|
||||
c.AUMHashes["A1750"], c.AUMHashes["A1700"], c.AUMHashes["A1650"],
|
||||
c.AUMHashes["A1600"], c.AUMHashes["A1550"], c.AUMHashes["A1500"],
|
||||
c.AUMHashes["A1450"], c.AUMHashes["A1400"], c.AUMHashes["A1350"],
|
||||
c.AUMHashes["A1300"], c.AUMHashes["A1250"], c.AUMHashes["A1200"],
|
||||
c.AUMHashes["A1150"], c.AUMHashes["A1100"], c.AUMHashes["A1050"],
|
||||
c.AUMHashes["A1000"], c.AUMHashes["A950"], c.AUMHashes["A900"],
|
||||
c.AUMHashes["A850"], c.AUMHashes["A800"], c.AUMHashes["A"],
|
||||
},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestComputeSyncIntersection_FastForward(t *testing.T) {
|
||||
@ -118,35 +208,15 @@ func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
|
||||
|
||||
chonk1 := c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "F1")
|
||||
offer1 := getSyncOffer(t, chonk1)
|
||||
want1 := SyncOffer{
|
||||
Head: c.AUMHashes["F1"],
|
||||
Ancestors: []AUMHash{
|
||||
c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)],
|
||||
c.AUMHashes["A1"],
|
||||
},
|
||||
}
|
||||
if diff := cmp.Diff(want1, offer1); diff != "" {
|
||||
t.Errorf("offer1 diff (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
chonk2 := c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10")
|
||||
offer2 := getSyncOffer(t, chonk2)
|
||||
want2 := SyncOffer{
|
||||
Head: c.AUMHashes["A10"],
|
||||
Ancestors: []AUMHash{
|
||||
c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)],
|
||||
c.AUMHashes["A1"],
|
||||
},
|
||||
}
|
||||
if diff := cmp.Diff(want2, offer2); diff != "" {
|
||||
t.Errorf("offer2 diff (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
// Node 1 only knows about the first eight nodes, so the head of n2 is
|
||||
// alien to it.
|
||||
t.Run("n1", func(t *testing.T) {
|
||||
// n2 has 10 nodes, so the first common ancestor should be 10-ancestorsSkipStart
|
||||
wantIntersection := c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)]
|
||||
// n2 has 10 nodes, so the first common ancestor is the genesis AUM
|
||||
wantIntersection := c.AUMHashes["A1"]
|
||||
|
||||
got, err := computeSyncIntersection(chonk1, offer1, offer2)
|
||||
if err != nil {
|
||||
@ -162,8 +232,8 @@ func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
|
||||
|
||||
// Node 2 knows about the full chain but doesn't recognize the head.
|
||||
t.Run("n2", func(t *testing.T) {
|
||||
// n1 has 9 nodes, so the first common ancestor should be 9-ancestorsSkipStart
|
||||
wantIntersection := c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)]
|
||||
// n1 has 9 nodes, so the first common ancestor is the genesis AUM
|
||||
wantIntersection := c.AUMHashes["A1"]
|
||||
|
||||
got, err := computeSyncIntersection(chonk2, offer2, offer1)
|
||||
if err != nil {
|
||||
@ -354,3 +424,109 @@ func TestSyncSimpleE2E(t *testing.T) {
|
||||
t.Errorf("node & control are not synced: c=%x, n=%x", cHash, nHash)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSyncFromFarBehind checks that nodes with compacted state can still find
|
||||
// a common ancestor when the remote is significantly ahead.
|
||||
//
|
||||
// We simulate a node that has compacted its early history and is now ~500 AUMs
|
||||
// behind the control plane, a distance that previously caused exponential sampling
|
||||
// in SyncOffer to skip the node's entire local history.
|
||||
//
|
||||
// Regression test for http://go/corp/40404
|
||||
func TestSyncFromFarBehind(t *testing.T) {
|
||||
pub1, priv1 := testingKey25519(t, 1)
|
||||
pub2, _ := testingKey25519(t, 2)
|
||||
signer1 := signer25519(priv1)
|
||||
|
||||
key1 := Key{Kind: Key25519, Public: pub1, Votes: 2}
|
||||
key2 := Key{Kind: Key25519, Public: pub2, Votes: 2}
|
||||
|
||||
// Setup: persistentAuthority (control plane) vs compactingAuthority (client node).
|
||||
state := State{
|
||||
Keys: []Key{key1},
|
||||
DisablementValues: [][]byte{DisablementKDF([]byte{1, 2, 3})},
|
||||
}
|
||||
|
||||
persistentStorage, compactingStorage := ChonkMem(), ChonkMem()
|
||||
persistentSize := func() int { return len(must.Get(persistentStorage.AllAUMs())) }
|
||||
compactingSize := func() int { return len(must.Get(compactingStorage.AllAUMs())) }
|
||||
|
||||
// Backdate the clock on the compactingStorage so all AUMs will be old enough
|
||||
// to be considered for compacting.
|
||||
clock := tstest.NewClock(tstest.ClockOpts{
|
||||
Start: time.Now().Add(-(CompactionDefaults.MinAge + 24*time.Hour)),
|
||||
})
|
||||
compactingStorage.SetClock(clock)
|
||||
|
||||
persistentAuthority, genesisAUM := must.Get2(Create(persistentStorage, state, signer1))
|
||||
compactingAuthority := must.Get(Bootstrap(compactingStorage, genesisAUM))
|
||||
|
||||
// 1. Generate enough history to trigger checkpoints.
|
||||
for range checkpointEvery * 2 {
|
||||
update := persistentAuthority.NewUpdater(signer1)
|
||||
|
||||
must.Do(update.AddKey(key2))
|
||||
addKey := must.Get(update.Finalize(persistentStorage))
|
||||
must.Do(persistentAuthority.Inform(persistentStorage, addKey))
|
||||
must.Do(compactingAuthority.Inform(compactingStorage, addKey))
|
||||
|
||||
update = persistentAuthority.NewUpdater(signer1)
|
||||
must.Do(update.RemoveKey(key2.MustID()))
|
||||
removeKey := must.Get(update.Finalize(persistentStorage))
|
||||
must.Do(persistentAuthority.Inform(persistentStorage, removeKey))
|
||||
must.Do(compactingAuthority.Inform(compactingStorage, removeKey))
|
||||
}
|
||||
|
||||
t.Logf("genesis and first batch of AUMs: persistent = %d, compacting = %d", persistentSize(), compactingSize())
|
||||
|
||||
// 2. Compact the node state.
|
||||
//
|
||||
// It now has a different 'oldestAncestor' than the control plane.
|
||||
beforeCompacting := compactingSize()
|
||||
must.Do(compactingAuthority.Compact(compactingStorage, CompactionDefaults))
|
||||
afterCompacting := compactingSize()
|
||||
|
||||
if beforeCompacting == afterCompacting {
|
||||
t.Errorf("expected Compact to reduce the number of AUMs, but unchanged: size = %d", afterCompacting)
|
||||
}
|
||||
|
||||
// 3. Advance the control plane far beyond the node.
|
||||
//
|
||||
// As of 2026-04-17, the largest TKA has ~750 AUMs.
|
||||
//
|
||||
// If you keep increasing this number, eventually the sync will fail because you
|
||||
// hit the hard-coded limits on iteration during the sync process.
|
||||
for persistentSize() < compactingSize()+800 {
|
||||
b := persistentAuthority.NewUpdater(signer1)
|
||||
|
||||
must.Do(b.AddKey(key2))
|
||||
addKey := must.Get(b.Finalize(persistentStorage))
|
||||
must.Do(persistentAuthority.Inform(persistentStorage, addKey))
|
||||
|
||||
b = persistentAuthority.NewUpdater(signer1)
|
||||
must.Do(b.RemoveKey(key2.MustID()))
|
||||
removeKey := must.Get(b.Finalize(persistentStorage))
|
||||
must.Do(persistentAuthority.Inform(persistentStorage, removeKey))
|
||||
}
|
||||
|
||||
t.Logf("post-compacting and extra AUMs: persistent = %d, compacting = %d", persistentSize(), compactingSize())
|
||||
|
||||
// 4. Verify Intersection.
|
||||
// The node should find an intersection even with a 500-AUM gap.
|
||||
persistentOffer := must.Get(persistentAuthority.SyncOffer(persistentStorage))
|
||||
compactingOffer := must.Get(compactingAuthority.SyncOffer(compactingStorage))
|
||||
|
||||
if _, err := compactingAuthority.MissingAUMs(compactingStorage, persistentOffer); err != nil {
|
||||
t.Errorf("node failed to find intersection with far-ahead control plane: %v", err)
|
||||
}
|
||||
|
||||
// 4. Check that the persistent authority can find an intersection with the
|
||||
// compacting authority, and has missing AUMs to send it.
|
||||
missing, err := persistentAuthority.MissingAUMs(persistentStorage, compactingOffer)
|
||||
if len(missing) == 0 {
|
||||
t.Errorf("control plane did not find any missing AUMs for node")
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("control plane failed to find missing AUMs for node: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user