diff --git a/cmd/tailscaled/depaware-min.txt b/cmd/tailscaled/depaware-min.txt index e750f86e6..3c111470f 100644 --- a/cmd/tailscaled/depaware-min.txt +++ b/cmd/tailscaled/depaware-min.txt @@ -69,7 +69,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/ipn/ipnstate from tailscale.com/control/controlclient+ tailscale.com/ipn/localapi from tailscale.com/ipn/ipnserver tailscale.com/ipn/store from tailscale.com/cmd/tailscaled - tailscale.com/ipn/store/mem from tailscale.com/ipn/store + tailscale.com/ipn/store/mem from tailscale.com/ipn/store+ tailscale.com/kube/kubetypes from tailscale.com/envknob tailscale.com/log/filelogger from tailscale.com/logpolicy tailscale.com/log/sockstatlog from tailscale.com/ipn/ipnlocal diff --git a/cmd/tailscaled/depaware-minbox.txt b/cmd/tailscaled/depaware-minbox.txt index 17f1a22b2..40a1fb2a4 100644 --- a/cmd/tailscaled/depaware-minbox.txt +++ b/cmd/tailscaled/depaware-minbox.txt @@ -92,7 +92,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/ipn/ipnstate from tailscale.com/control/controlclient+ tailscale.com/ipn/localapi from tailscale.com/ipn/ipnserver tailscale.com/ipn/store from tailscale.com/cmd/tailscaled - tailscale.com/ipn/store/mem from tailscale.com/ipn/store + tailscale.com/ipn/store/mem from tailscale.com/ipn/store+ tailscale.com/kube/kubetypes from tailscale.com/envknob tailscale.com/licenses from tailscale.com/cmd/tailscale/cli tailscale.com/log/filelogger from tailscale.com/logpolicy diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go index f14cdcff0..d923ca1ed 100644 --- a/cmd/tailscaled/tailscaled.go +++ b/cmd/tailscaled/tailscaled.go @@ -33,12 +33,14 @@ import ( "tailscale.com/feature" "tailscale.com/feature/buildfeatures" _ "tailscale.com/feature/condregister" + "tailscale.com/health" "tailscale.com/hostinfo" "tailscale.com/ipn" "tailscale.com/ipn/conffile" "tailscale.com/ipn/ipnlocal" "tailscale.com/ipn/ipnserver" "tailscale.com/ipn/store" + "tailscale.com/ipn/store/mem" "tailscale.com/logpolicy" "tailscale.com/logtail" "tailscale.com/net/dns" @@ -644,7 +646,16 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID store, err := store.New(logf, statePathOrDefault()) if err != nil { - return nil, fmt.Errorf("store.New: %w", err) + // If we can't create the store (for example if it's TPM-sealed and the + // TPM is reset), create a dummy in-memory store to propagate the error + // to the user. + ht, ok := sys.HealthTracker.GetOK() + if !ok { + return nil, fmt.Errorf("store.New: %w", err) + } + logf("store.New failed: %v; starting with in-memory store with a health warning", err) + store = new(mem.Store) + ht.SetUnhealthy(ipn.StateStoreHealth, health.Args{health.ArgError: err.Error()}) } sys.Set(store) diff --git a/cmd/tailscaled/tailscaled_test.go b/cmd/tailscaled/tailscaled_test.go index c50c23759..1188ad35f 100644 --- a/cmd/tailscaled/tailscaled_test.go +++ b/cmd/tailscaled/tailscaled_test.go @@ -4,9 +4,17 @@ package main // import "tailscale.com/cmd/tailscaled" import ( + "os" + "strings" "testing" + "tailscale.com/envknob" + "tailscale.com/ipn" + "tailscale.com/net/netmon" + "tailscale.com/tsd" "tailscale.com/tstest/deptest" + "tailscale.com/types/logid" + "tailscale.com/util/must" ) func TestNothing(t *testing.T) { @@ -38,3 +46,45 @@ func TestDeps(t *testing.T) { }, }.Check(t) } + +func TestStateStoreError(t *testing.T) { + logID, err := logid.NewPrivateID() + if err != nil { + t.Fatal(err) + } + // Don't upload any logs from tests. + envknob.SetNoLogsNoSupport() + + args.statedir = t.TempDir() + args.tunname = "userspace-networking" + + t.Run("new state", func(t *testing.T) { + sys := tsd.NewSystem() + sys.NetMon.Set(must.Get(netmon.New(sys.Bus.Get(), t.Logf))) + lb, err := getLocalBackend(t.Context(), t.Logf, logID.Public(), sys) + if err != nil { + t.Fatal(err) + } + defer lb.Shutdown() + if lb.HealthTracker().IsUnhealthy(ipn.StateStoreHealth) { + t.Errorf("StateStoreHealth is unhealthy on fresh LocalBackend:\n%s", strings.Join(lb.HealthTracker().Strings(), "\n")) + } + }) + t.Run("corrupt state", func(t *testing.T) { + sys := tsd.NewSystem() + sys.NetMon.Set(must.Get(netmon.New(sys.Bus.Get(), t.Logf))) + // Populate the state file with something that will fail to parse to + // trigger an error from store.New. + if err := os.WriteFile(statePathOrDefault(), []byte("bad json"), 0644); err != nil { + t.Fatal(err) + } + lb, err := getLocalBackend(t.Context(), t.Logf, logID.Public(), sys) + if err != nil { + t.Fatal(err) + } + defer lb.Shutdown() + if !lb.HealthTracker().IsUnhealthy(ipn.StateStoreHealth) { + t.Errorf("StateStoreHealth is healthy when state file is corrupt") + } + }) +} diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 0ff299399..72b230327 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -3747,6 +3747,9 @@ func (b *LocalBackend) StartLoginInteractive(ctx context.Context) error { // the control plane sends us one. Otherwise, the notification will be delivered to all // active [watchSession]s. func (b *LocalBackend) StartLoginInteractiveAs(ctx context.Context, user ipnauth.Actor) error { + if b.health.IsUnhealthy(ipn.StateStoreHealth) { + return errors.New("cannot log in when state store is unhealthy") + } b.mu.Lock() defer b.mu.Unlock() if b.cc == nil { @@ -5677,6 +5680,9 @@ func (b *LocalBackend) NodeKey() key.NodePublic { // // b.mu must be held func (b *LocalBackend) nextStateLocked() ipn.State { + if b.health.IsUnhealthy(ipn.StateStoreHealth) { + return ipn.NoState + } var ( cc = b.cc cn = b.currentNode() @@ -6936,6 +6942,9 @@ func (b *LocalBackend) CurrentProfile() ipn.LoginProfileView { // NewProfile creates and switches to the new profile. func (b *LocalBackend) NewProfile() error { + if b.health.IsUnhealthy(ipn.StateStoreHealth) { + return errors.New("cannot log in when state store is unhealthy") + } b.mu.Lock() defer b.mu.Unlock() diff --git a/ipn/localapi/localapi.go b/ipn/localapi/localapi.go index d3503d302..7f249fe53 100644 --- a/ipn/localapi/localapi.go +++ b/ipn/localapi/localapi.go @@ -930,7 +930,10 @@ func (h *Handler) serveLoginInteractive(w http.ResponseWriter, r *http.Request) http.Error(w, "want POST", http.StatusBadRequest) return } - h.b.StartLoginInteractiveAs(r.Context(), h.Actor) + if err := h.b.StartLoginInteractiveAs(r.Context(), h.Actor); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } w.WriteHeader(http.StatusNoContent) return } @@ -949,6 +952,11 @@ func (h *Handler) serveStart(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return } + + if h.b.HealthTracker().IsUnhealthy(ipn.StateStoreHealth) { + http.Error(w, "cannot start backend when state store is unhealthy", http.StatusInternalServerError) + return + } err := h.b.Start(o) if err != nil { // TODO(bradfitz): map error to a good HTTP error diff --git a/ipn/localapi/localapi_test.go b/ipn/localapi/localapi_test.go index 6bb9b5182..5d228ffd6 100644 --- a/ipn/localapi/localapi_test.go +++ b/ipn/localapi/localapi_test.go @@ -25,9 +25,11 @@ import ( "testing" "tailscale.com/client/tailscale/apitype" + "tailscale.com/health" "tailscale.com/ipn" "tailscale.com/ipn/ipnauth" "tailscale.com/ipn/ipnlocal" + "tailscale.com/ipn/ipnstate" "tailscale.com/ipn/store/mem" "tailscale.com/tailcfg" "tailscale.com/tsd" @@ -428,3 +430,73 @@ func TestKeepItSorted(t *testing.T) { } } } + +func TestServeWithUnhealthyState(t *testing.T) { + tstest.Replace(t, &validLocalHostForTesting, true) + h := &Handler{ + PermitRead: true, + PermitWrite: true, + b: newTestLocalBackend(t), + logf: t.Logf, + } + h.b.HealthTracker().SetUnhealthy(ipn.StateStoreHealth, health.Args{health.ArgError: "testing"}) + if err := h.b.Start(ipn.Options{}); err != nil { + t.Fatal(err) + } + + check500Body := func(wantResp string) func(t *testing.T, code int, resp []byte) { + return func(t *testing.T, code int, resp []byte) { + if code != http.StatusInternalServerError { + t.Errorf("got code: %v, want %v\nresponse: %q", code, http.StatusInternalServerError, resp) + } + if got := strings.TrimSpace(string(resp)); got != wantResp { + t.Errorf("got response: %q, want %q", got, wantResp) + } + } + } + tests := []struct { + desc string + req *http.Request + check func(t *testing.T, code int, resp []byte) + }{ + { + desc: "status", + req: httptest.NewRequest("GET", "http://localhost:1234/localapi/v0/status", nil), + check: func(t *testing.T, code int, resp []byte) { + if code != http.StatusOK { + t.Errorf("got code: %v, want %v\nresponse: %q", code, http.StatusOK, resp) + } + var status ipnstate.Status + if err := json.Unmarshal(resp, &status); err != nil { + t.Fatal(err) + } + if status.BackendState != "NoState" { + t.Errorf("got backend state: %q, want %q", status.BackendState, "NoState") + } + }, + }, + { + desc: "login-interactive", + req: httptest.NewRequest("POST", "http://localhost:1234/localapi/v0/login-interactive", nil), + check: check500Body("cannot log in when state store is unhealthy"), + }, + { + desc: "start", + req: httptest.NewRequest("POST", "http://localhost:1234/localapi/v0/start", strings.NewReader("{}")), + check: check500Body("cannot start backend when state store is unhealthy"), + }, + { + desc: "new-profile", + req: httptest.NewRequest("PUT", "http://localhost:1234/localapi/v0/profiles/", nil), + check: check500Body("cannot log in when state store is unhealthy"), + }, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + resp := httptest.NewRecorder() + h.ServeHTTP(resp, tt.req) + tt.check(t, resp.Code, resp.Body.Bytes()) + }) + } +} diff --git a/ipn/store.go b/ipn/store.go index 9da5288c0..2034ae09a 100644 --- a/ipn/store.go +++ b/ipn/store.go @@ -10,6 +10,8 @@ import ( "fmt" "net" "strconv" + + "tailscale.com/health" ) // ErrStateNotExist is returned by StateStore.ReadState when the @@ -60,6 +62,19 @@ const ( TaildropReceivedKey = StateKey("_taildrop-received") ) +// StateStoreHealth is a Warnable set when store.New fails at startup. If +// unhealthy, we block all login attempts and return a health message in status +// responses. +var StateStoreHealth = health.Register(&health.Warnable{ + Code: "state-store-health", + Severity: health.SeverityHigh, + Title: "Tailscale state store failed to initialize", + Text: func(args health.Args) string { + return fmt.Sprintf("State store failed to initialize, Tailscale will not work until this is resolved. See https://tailscale.com/s/state-store-init-error. Error: %s", args[health.ArgError]) + }, + ImpactsConnectivity: true, +}) + // CurrentProfileID returns the StateKey that stores the // current profile ID. The value is a JSON-encoded LoginProfile. // If the userID is empty, the key returned is CurrentProfileStateKey, diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 9d75cfc29..543dc125c 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -22,6 +22,7 @@ import ( "path/filepath" "regexp" "runtime" + "slices" "strconv" "strings" "sync/atomic" @@ -36,6 +37,7 @@ import ( "tailscale.com/cmd/testwrapper/flakytest" "tailscale.com/feature" _ "tailscale.com/feature/clientupdate" + "tailscale.com/health" "tailscale.com/hostinfo" "tailscale.com/ipn" "tailscale.com/net/tsaddr" @@ -2246,3 +2248,38 @@ func TestNetworkLock(t *testing.T) { } }) } + +func TestNodeWithBadStateFile(t *testing.T) { + tstest.Shard(t) + tstest.Parallel(t) + env := NewTestEnv(t) + n1 := NewTestNode(t, env) + if err := os.WriteFile(n1.stateFile, []byte("bad json"), 0644); err != nil { + t.Fatal(err) + } + + d1 := n1.StartDaemon() + n1.AwaitResponding() + + // Make sure the health message shows up in status output. + n1.AwaitBackendState("NoState") + st := n1.MustStatus() + wantHealth := ipn.StateStoreHealth.Text(health.Args{health.ArgError: ""}) + if !slices.ContainsFunc(st.Health, func(m string) bool { return strings.HasPrefix(m, wantHealth) }) { + t.Errorf("Status does not contain expected health message %q\ngot health messages: %q", wantHealth, st.Health) + } + + // Make sure login attempts are rejected. + cmd := n1.Tailscale("up", "--login-server="+n1.env.ControlURL()) + t.Logf("Running %v ...", cmd) + out, err := cmd.CombinedOutput() + if err == nil { + t.Fatalf("up succeeded with output %q", out) + } + wantOut := "cannot start backend when state store is unhealthy" + if !strings.Contains(string(out), wantOut) { + t.Fatalf("got up output:\n%s\nwant:\n%s", string(out), wantOut) + } + + d1.MustCleanShutdown(t) +} diff --git a/tstest/integration/tailscaled_deps_test_darwin.go b/tstest/integration/tailscaled_deps_test_darwin.go index 217188f75..9f92839d8 100644 --- a/tstest/integration/tailscaled_deps_test_darwin.go +++ b/tstest/integration/tailscaled_deps_test_darwin.go @@ -27,6 +27,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/tailscaled_deps_test_freebsd.go b/tstest/integration/tailscaled_deps_test_freebsd.go index 217188f75..9f92839d8 100644 --- a/tstest/integration/tailscaled_deps_test_freebsd.go +++ b/tstest/integration/tailscaled_deps_test_freebsd.go @@ -27,6 +27,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/tailscaled_deps_test_linux.go b/tstest/integration/tailscaled_deps_test_linux.go index 217188f75..9f92839d8 100644 --- a/tstest/integration/tailscaled_deps_test_linux.go +++ b/tstest/integration/tailscaled_deps_test_linux.go @@ -27,6 +27,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/tailscaled_deps_test_openbsd.go b/tstest/integration/tailscaled_deps_test_openbsd.go index 217188f75..9f92839d8 100644 --- a/tstest/integration/tailscaled_deps_test_openbsd.go +++ b/tstest/integration/tailscaled_deps_test_openbsd.go @@ -27,6 +27,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/tailscaled_deps_test_windows.go b/tstest/integration/tailscaled_deps_test_windows.go index f3cd5e75b..82f8097c8 100644 --- a/tstest/integration/tailscaled_deps_test_windows.go +++ b/tstest/integration/tailscaled_deps_test_windows.go @@ -37,6 +37,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns"