controlclient: persist node key before sending RegisterRequest

The node key generated in doLogin was only written to persistent
storage after a successful HTTP response. If the process was killed
mid-request, the key was lost and a fresh one generated on restart,
causing each restart to look like a new node to the control server.

Add a PersistState callback on Direct.Options that writes the new
key directly to the state store before the HTTP POST. On restart
the existing key is reused, turning N restarts into N retries of
the same registration instead of N new registrations.

Fixes #19149

Signed-off-by: Raj Singh <raj@tailscale.com>
This commit is contained in:
Raj Singh 2026-03-28 00:34:36 -05:00 committed by Raj Singh
parent 156e6ae5cd
commit 2bf0759e58
2 changed files with 32 additions and 0 deletions

View File

@ -79,6 +79,7 @@ type Direct struct {
autoUpdatePub *eventbus.Publisher[AutoUpdate]
controlTimePub *eventbus.Publisher[ControlTime]
getMachinePrivKey func() (key.MachinePrivate, error)
persistState func(persist.PersistView) // or nil; called before RegisterRequest to persist new node key
debugFlags []string
pinger Pinger
popBrowser func(url string) // or nil
@ -176,6 +177,12 @@ type Options struct {
// attempted. It is used to allow the client to clean up any resources or complete any
// tasks that are dependent on a live client.
Shutdown func()
// PersistState, if non-nil, is called with an updated Persist after
// generating a new node key but before sending the RegisterRequest.
// This allows the caller to write the key to durable storage so that
// a crash during registration doesn't lose the key.
PersistState func(persist.PersistView)
}
// ControlDialPlanner is the interface optionally supplied when creating a
@ -322,6 +329,7 @@ func NewDirect(opts Options) (*Direct, error) {
dialer: opts.Dialer,
dnsCache: dnsCache,
dialPlan: opts.DialPlan,
persistState: opts.PersistState,
}
c.discoPubKey = opts.DiscoPublicKey
c.closedCtx, c.closeCtx = context.WithCancel(context.Background())
@ -646,6 +654,14 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new
persist.NetworkLockKey = key.NewNLPrivate()
}
// Persist the new key before the RegisterRequest so a crash
// mid-HTTP doesn't lose it.
if c.persistState != nil && !tryingNewKey.IsZero() && (persist.PrivateNodeKey.IsZero() || tryingNewKey.Public() != persist.PrivateNodeKey.Public()) {
preHTTPPersist := persist
preHTTPPersist.PrivateNodeKey = tryingNewKey
c.persistState(preHTTPPersist.View())
}
nlPub := persist.NetworkLockKey.Public()
if tryingNewKey.IsZero() {

View File

@ -2629,6 +2629,7 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
// but it won't take effect until the next Start.
cc, err := b.getNewControlClientFuncLocked()(controlclient.Options{
GetMachinePrivateKey: b.createGetMachinePrivateKeyFunc(),
PersistState: b.createPersistStateFunc(),
Logf: logger.WithPrefix(b.logf, "control: "),
Persist: *persistv,
ServerURL: serverURL,
@ -3630,6 +3631,21 @@ func (b *LocalBackend) createGetMachinePrivateKeyFunc() func() (key.MachinePriva
}
}
// createPersistStateFunc returns a function that writes an updated Persist
// directly to the state store, bypassing setPrefsNoPermCheck to avoid
// triggering hooks for state not yet confirmed by the control server.
func (b *LocalBackend) createPersistStateFunc() func(persist.PersistView) {
return func(p persist.PersistView) {
b.mu.Lock()
defer b.mu.Unlock()
prefs := b.pm.CurrentPrefs().AsStruct()
prefs.Persist = p.AsStruct()
if err := b.pm.writePrefsToStore(b.pm.currentProfile.Key(), prefs.View()); err != nil {
b.logf("persist node key before register: %v", err)
}
}
}
// initMachineKeyLocked is called to initialize b.machinePrivKey.
//
// b.prefs must already be initialized.