From eb23a645a53663afd3976ea6065129abc26a3cb0 Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Wed, 26 Jul 2023 12:29:18 -0400 Subject: [PATCH] health, ipn/ipnlocal: add healthcheck for state/config If the user sufficiently breaks their state file such that the local machine or node keys are zero, the machine ends up in a broken state but we don't surface this to the user. Add a healthcheck that tracks errors when these key(s) are zero and surfaces them in 'tailscale status'/etc. Signed-off-by: Andrew Dunham Change-Id: Ideffbf9510b09850ebf8daf440fac78109de7812 --- health/health.go | 10 ++++++++++ ipn/ipnlocal/local.go | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/health/health.go b/health/health.go index bef416272..39d3edd87 100644 --- a/health/health.go +++ b/health/health.go @@ -59,6 +59,10 @@ const ( // the system, rather than one particular subsystem. SysOverall = Subsystem("overall") + // SysConfig is the name of the subsystem that tracks health of the + // Tailscale state file and configuration. + SysConfig = Subsystem("config") + // SysRouter is the name of the wgengine/router subsystem. SysRouter = Subsystem("router") @@ -203,6 +207,12 @@ func SetTKAHealth(err error) { setErr(SysTKA, err) } // TKAHealth returns the tailnet key authority error state. func TKAHealth() error { return get(SysTKA) } +// SetConfigHealth sets the health of the configuration. +func SetConfigHealth(err error) { setErr(SysConfig, err) } + +// ConfigHealth returns the configuration error state. +func ConfigHealth() error { return get(SysConfig) } + // SetLocalLogConfigHealth sets the error state of this client's local log configuration. func SetLocalLogConfigHealth(err error) { mu.Lock() diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 813d01265..7ba8299b6 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -3667,6 +3667,27 @@ func (b *LocalBackend) enterStateLockedOnEntry(newState ipn.State) { addrs = append(addrs, addr.Addr().String()) } systemd.Status("Connected; %s; %s", activeLogin, strings.Join(addrs, " ")) + + var configErrs []error + if prefs.Valid() && prefs.Persist().Valid() { + pprefs := prefs.Persist() + if pprefs.PublicNodeKey().IsZero() { + configErrs = append(configErrs, fmt.Errorf("public node key is zero")) + } + if pprefs.PrivateNodeKey().IsZero() { + configErrs = append(configErrs, fmt.Errorf("private node key is zero")) + } + } + b.mu.Lock() + if b.machinePrivKey.IsZero() { + configErrs = append(configErrs, fmt.Errorf("private machine key is zero")) + } + b.mu.Unlock() + + if len(configErrs) > 0 { + health.SetConfigHealth(multierr.New(configErrs...)) + } + case ipn.NoState: // Do nothing. default: @@ -4920,6 +4941,7 @@ func (b *LocalBackend) resetForProfileChangeLockedOnEntry() error { b.serveConfig = ipn.ServeConfigView{} b.enterStateLockedOnEntry(ipn.NoState) // Reset state. health.SetLocalLogConfigHealth(nil) + health.SetConfigHealth(nil) return b.Start(ipn.Options{}) }