tka/sync: send checkpoints to ensure far-behind nodes can catch up

Previously there was a mismatch between how nodes store AUMs and what
the control plane would offer during sync:

- Client compaction: Nodes aggressively compact their TKA state -- they
  keep the last 24 AUMs, every AUM received in the last two weeks, and
  then everything from there back to the last checkpoint. Depending on
  when it compacts, a node may only have ~50 AUMs.
- Exponential sampling: To save bandwidth, the control plane would send
  a SyncOffer containing ancestors at exponentially increasing intervals
  (4th, 16th, 64th, 256th...).

If a node has been offline for too long, the exponential sampling skips
the node's smaller window. When the SyncOffer and local state are disjoint,
the node cannot find a common ancestor to use for synchronisation.
It enters a failure loop where it keeps polling for new TKA state, but
it cannot catch up and has an increasingly-outdated view of the tailnet.

This patch replaces the exponential sampling with a SyncOffer that sends
every checkpoint ancestor of the current HEAD. Since every node is
guaranteed to keep at least one checkpoint after compaction, we're more
likely to have an intersection for the sync process.

This patch also increases `maxSyncHeadIntersectionIter`, which in
practice means the control plane will send every checkpoint in the
current chain. This means all affected nodes will be able to find an
intersection and catch up immediately, without requiring a client update.

It's still possible for a node to be unable to sync, but these edge cases
become less likely with this change. (For example, if a node is 1000+ AUMs
behind, or if it creates a local branch and then compacts away the
intersection with the main chain.)

This patch includes a regression test with synthetic data, and I
verified the fix with customer data.

Updates https://github.com/tailscale/corp/issues/40404

Change-Id: I2174011bb23a2b5972f6d1591aadcc016e3cba35
Signed-off-by: Alex Chan <alexc@tailscale.com>
This commit is contained in:
Alex Chan 2026-04-17 10:56:09 +01:00
parent a6c5d23742
commit 629dc311b4
4 changed files with 231 additions and 72 deletions

View File

@ -21,7 +21,7 @@ const (
maxSyncIter = 2000
// Max iterations searching for a head intersection during the sync process.
maxSyncHeadIntersectionIter = 400
maxSyncHeadIntersectionIter = 1000
// Limit on scanning AUM trees, chosen arbitrarily.
maxScanIterations = 2000

View File

@ -33,7 +33,7 @@ type State struct {
// possesses a valid DisablementSecret. These values are used during the
// Tailnet Lock deactivation process.
//
// These are safe to share publicly or store in the clear. They cannot be
// These are safe to share publicly or store in the clear. They cannot be
// used to derive the original DisablementSecret.
DisablementValues [][]byte `cbor:"2,keyasint"`

View File

@ -60,18 +60,6 @@ func FromSyncOffer(offer SyncOffer) (head string, ancestors []string, err error)
return string(headBytes), ancestors, nil
}
const (
// The starting number of AUMs to skip when listing
// ancestors in a SyncOffer.
ancestorsSkipStart = 4
// How many bits to advance the skip count when listing
// ancestors in a SyncOffer.
//
// 2 bits, so (4<<2), so after skipping 4 it skips 16.
ancestorsSkipShift = 2
)
// SyncOffer returns an abbreviated description of the current AUM
// chain, which can be used to synchronize with another (untrusted)
// Authority instance.
@ -92,20 +80,10 @@ func (a *Authority) SyncOffer(storage Chonk) (SyncOffer, error) {
Ancestors: make([]AUMHash, 0, 6), // 6 chosen arbitrarily.
}
// We send some subset of our ancestors to help the remote
// find a more-recent 'head intersection'.
// The number of AUMs between each ancestor entry gets
// exponentially larger.
var (
skipAmount uint64 = ancestorsSkipStart
curs AUMHash = a.Head()
)
for i := range uint64(maxSyncHeadIntersectionIter) {
if i > 0 && (i%skipAmount) == 0 {
out.Ancestors = append(out.Ancestors, curs)
skipAmount = skipAmount << ancestorsSkipShift
}
// We send all our checkpoints to help the remote find a
// more-recent 'head intersection'.
curs := a.Head()
for range maxSyncHeadIntersectionIter {
parent, err := storage.AUM(curs)
if err != nil {
if err != os.ErrNotExist {
@ -118,6 +96,11 @@ func (a *Authority) SyncOffer(storage Chonk) (SyncOffer, error) {
if parent.Hash() == oldest {
break
}
if parent.MessageKind == AUMCheckpoint {
out.Ancestors = append(out.Ancestors, curs)
}
copy(curs[:], parent.PrevAUMHash)
}

View File

@ -5,10 +5,14 @@ package tka
import (
"bytes"
"strconv"
"fmt"
"strings"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"tailscale.com/tstest"
"tailscale.com/util/must"
)
// getSyncOffer returns a SyncOffer for the given Chonk.
@ -28,27 +32,113 @@ func getSyncOffer(t *testing.T, storage Chonk) SyncOffer {
}
func TestSyncOffer(t *testing.T) {
c := newTestchain(t, `
A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
A10 -> A11 -> A12 -> A13 -> A14 -> A15 -> A16 -> A17 -> A18
A18 -> A19 -> A20 -> A21 -> A22 -> A23 -> A24 -> A25
`)
got := getSyncOffer(t, c.Chonk())
// A SyncOffer includes a selection of AUMs going backwards in the tree,
// progressively skipping more and more each iteration.
want := SyncOffer{
Head: c.AUMHashes["A25"],
Ancestors: []AUMHash{
c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart)],
c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart<<ancestorsSkipShift)],
c.AUMHashes["A1"],
},
fakeState := &State{
Keys: []Key{{Kind: Key25519, Votes: 1}},
DisablementValues: [][]byte{bytes.Repeat([]byte{1}, 32)},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
}
checkpointTemplate := optTemplate("checkpoint", AUM{MessageKind: AUMCheckpoint, State: fakeState})
// If we have a small chain with just a handful of AUMs, the SyncOffer
// contains the current HEAD and the first checkpoint.
t.Run("short-chain", func(t *testing.T) {
c := newTestchain(t, `A1 -> A2 -> A3 -> A4 -> A5`)
got := getSyncOffer(t, c.Chonk())
// A SyncOffer includes the first checkpoint.
want := SyncOffer{
Head: c.AUMHashes["A5"],
Ancestors: []AUMHash{
c.AUMHashes["A1"],
},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
}
})
// If the chain contains multiple checkpoints, the SyncOffer includes
// all of them.
t.Run("chain-with-multiple-checkpoints", func(t *testing.T) {
c := newTestchain(t, `
A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
A10 -> A11 -> A12 -> A13 -> A14 -> A15 -> A16 -> A17 -> A18
A18 -> A19 -> A20 -> A21 -> A22 -> A23 -> A24 -> A25 -> A26
A26 -> A27 -> A28 -> A29 -> A30 -> A31 -> A32 -> A33 -> A34
A34 -> A35 -> A36 -> A37 -> A38 -> A39 -> A40 -> A41 -> A42
A42 -> A43 -> A45 -> A46 -> A47 -> A48 -> A49 -> A50 -> A51
A51 -> A52 -> A53 -> A54 -> A55
A1.template = checkpoint
A11.template = checkpoint
A21.template = checkpoint
A31.template = checkpoint
A41.template = checkpoint
A51.template = checkpoint
`, checkpointTemplate)
got := getSyncOffer(t, c.Chonk())
// A SyncOffer includes the first checkpoint.
want := SyncOffer{
Head: c.AUMHashes["A55"],
Ancestors: []AUMHash{
c.AUMHashes["A51"],
c.AUMHashes["A41"],
c.AUMHashes["A31"],
c.AUMHashes["A21"],
c.AUMHashes["A11"],
c.AUMHashes["A1"],
},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
}
})
// The size of a SyncOffer does not grow without bound as the number of AUMs increases.
t.Run("long-chain-size-is-bounded", func(t *testing.T) {
size := 1800
// Build a template string with a checkpoint every 50 AUMs.
var sb strings.Builder
sb.WriteString("A")
for i := range size {
sb.WriteString(fmt.Sprintf(" -> A%d", i))
}
for i := range size {
if i%50 == 0 {
sb.WriteString(fmt.Sprintf("\nA%d.template = checkpoint", i))
}
}
c := newTestchain(t, sb.String(), checkpointTemplate)
got := getSyncOffer(t, c.Chonk())
// We expect the SyncOffer to include:
//
// - the latest AUM as the HEAD
// - the checkpoints from the last 1000 AUMs (maxSyncHeadIntersectionIter)
// - the oldest AUM in storage
//
want := SyncOffer{
Head: c.AUMHashes["A1799"],
Ancestors: []AUMHash{
c.AUMHashes["A1750"], c.AUMHashes["A1700"], c.AUMHashes["A1650"],
c.AUMHashes["A1600"], c.AUMHashes["A1550"], c.AUMHashes["A1500"],
c.AUMHashes["A1450"], c.AUMHashes["A1400"], c.AUMHashes["A1350"],
c.AUMHashes["A1300"], c.AUMHashes["A1250"], c.AUMHashes["A1200"],
c.AUMHashes["A1150"], c.AUMHashes["A1100"], c.AUMHashes["A1050"],
c.AUMHashes["A1000"], c.AUMHashes["A950"], c.AUMHashes["A900"],
c.AUMHashes["A850"], c.AUMHashes["A800"], c.AUMHashes["A"],
},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
}
})
}
func TestComputeSyncIntersection_FastForward(t *testing.T) {
@ -118,35 +208,15 @@ func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
chonk1 := c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "F1")
offer1 := getSyncOffer(t, chonk1)
want1 := SyncOffer{
Head: c.AUMHashes["F1"],
Ancestors: []AUMHash{
c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)],
c.AUMHashes["A1"],
},
}
if diff := cmp.Diff(want1, offer1); diff != "" {
t.Errorf("offer1 diff (-want, +got):\n%s", diff)
}
chonk2 := c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10")
offer2 := getSyncOffer(t, chonk2)
want2 := SyncOffer{
Head: c.AUMHashes["A10"],
Ancestors: []AUMHash{
c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)],
c.AUMHashes["A1"],
},
}
if diff := cmp.Diff(want2, offer2); diff != "" {
t.Errorf("offer2 diff (-want, +got):\n%s", diff)
}
// Node 1 only knows about the first eight nodes, so the head of n2 is
// alien to it.
t.Run("n1", func(t *testing.T) {
// n2 has 10 nodes, so the first common ancestor should be 10-ancestorsSkipStart
wantIntersection := c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)]
// n2 has 10 nodes, so the first common ancestor is the genesis AUM
wantIntersection := c.AUMHashes["A1"]
got, err := computeSyncIntersection(chonk1, offer1, offer2)
if err != nil {
@ -162,8 +232,8 @@ func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
// Node 2 knows about the full chain but doesn't recognize the head.
t.Run("n2", func(t *testing.T) {
// n1 has 9 nodes, so the first common ancestor should be 9-ancestorsSkipStart
wantIntersection := c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)]
// n1 has 9 nodes, so the first common ancestor is the genesis AUM
wantIntersection := c.AUMHashes["A1"]
got, err := computeSyncIntersection(chonk2, offer2, offer1)
if err != nil {
@ -354,3 +424,109 @@ func TestSyncSimpleE2E(t *testing.T) {
t.Errorf("node & control are not synced: c=%x, n=%x", cHash, nHash)
}
}
// TestSyncFromFarBehind checks that nodes with compacted state can still find
// a common ancestor when the remote is significantly ahead.
//
// We simulate a node that has compacted its early history and is now ~500 AUMs
// behind the control plane, a distance that previously caused exponential sampling
// in SyncOffer to skip the node's entire local history.
//
// Regression test for http://go/corp/40404
func TestSyncFromFarBehind(t *testing.T) {
pub1, priv1 := testingKey25519(t, 1)
pub2, _ := testingKey25519(t, 2)
signer1 := signer25519(priv1)
key1 := Key{Kind: Key25519, Public: pub1, Votes: 2}
key2 := Key{Kind: Key25519, Public: pub2, Votes: 2}
// Setup: persistentAuthority (control plane) vs compactingAuthority (client node).
state := State{
Keys: []Key{key1},
DisablementValues: [][]byte{DisablementKDF([]byte{1, 2, 3})},
}
persistentStorage, compactingStorage := ChonkMem(), ChonkMem()
persistentSize := func() int { return len(must.Get(persistentStorage.AllAUMs())) }
compactingSize := func() int { return len(must.Get(compactingStorage.AllAUMs())) }
// Backdate the clock on the compactingStorage so all AUMs will be old enough
// to be considered for compacting.
clock := tstest.NewClock(tstest.ClockOpts{
Start: time.Now().Add(-(CompactionDefaults.MinAge + 24*time.Hour)),
})
compactingStorage.SetClock(clock)
persistentAuthority, genesisAUM := must.Get2(Create(persistentStorage, state, signer1))
compactingAuthority := must.Get(Bootstrap(compactingStorage, genesisAUM))
// 1. Generate enough history to trigger checkpoints.
for range checkpointEvery * 2 {
update := persistentAuthority.NewUpdater(signer1)
must.Do(update.AddKey(key2))
addKey := must.Get(update.Finalize(persistentStorage))
must.Do(persistentAuthority.Inform(persistentStorage, addKey))
must.Do(compactingAuthority.Inform(compactingStorage, addKey))
update = persistentAuthority.NewUpdater(signer1)
must.Do(update.RemoveKey(key2.MustID()))
removeKey := must.Get(update.Finalize(persistentStorage))
must.Do(persistentAuthority.Inform(persistentStorage, removeKey))
must.Do(compactingAuthority.Inform(compactingStorage, removeKey))
}
t.Logf("genesis and first batch of AUMs: persistent = %d, compacting = %d", persistentSize(), compactingSize())
// 2. Compact the node state.
//
// It now has a different 'oldestAncestor' than the control plane.
beforeCompacting := compactingSize()
must.Do(compactingAuthority.Compact(compactingStorage, CompactionDefaults))
afterCompacting := compactingSize()
if beforeCompacting == afterCompacting {
t.Errorf("expected Compact to reduce the number of AUMs, but unchanged: size = %d", afterCompacting)
}
// 3. Advance the control plane far beyond the node.
//
// As of 2026-04-17, the largest TKA has ~750 AUMs.
//
// If you keep increasing this number, eventually the sync will fail because you
// hit the hard-coded limits on iteration during the sync process.
for persistentSize() < compactingSize()+800 {
b := persistentAuthority.NewUpdater(signer1)
must.Do(b.AddKey(key2))
addKey := must.Get(b.Finalize(persistentStorage))
must.Do(persistentAuthority.Inform(persistentStorage, addKey))
b = persistentAuthority.NewUpdater(signer1)
must.Do(b.RemoveKey(key2.MustID()))
removeKey := must.Get(b.Finalize(persistentStorage))
must.Do(persistentAuthority.Inform(persistentStorage, removeKey))
}
t.Logf("post-compacting and extra AUMs: persistent = %d, compacting = %d", persistentSize(), compactingSize())
// 4. Verify Intersection.
// The node should find an intersection even with a 500-AUM gap.
persistentOffer := must.Get(persistentAuthority.SyncOffer(persistentStorage))
compactingOffer := must.Get(compactingAuthority.SyncOffer(compactingStorage))
if _, err := compactingAuthority.MissingAUMs(compactingStorage, persistentOffer); err != nil {
t.Errorf("node failed to find intersection with far-ahead control plane: %v", err)
}
// 4. Check that the persistent authority can find an intersection with the
// compacting authority, and has missing AUMs to send it.
missing, err := persistentAuthority.MissingAUMs(persistentStorage, compactingOffer)
if len(missing) == 0 {
t.Errorf("control plane did not find any missing AUMs for node")
}
if err != nil {
t.Errorf("control plane failed to find missing AUMs for node: %v", err)
}
}