diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go index d923ca1ed..203b5a0ac 100644 --- a/cmd/tailscaled/tailscaled.go +++ b/cmd/tailscaled/tailscaled.go @@ -209,7 +209,10 @@ func main() { flag.BoolVar(&args.disableLogs, "no-logs-no-support", false, "disable log uploads; this also disables any technical support") flag.StringVar(&args.confFile, "config", "", "path to config file, or 'vm:user-data' to use the VM's user-data (EC2)") if buildfeatures.HasTPM { - flag.Var(&args.hardwareAttestation, "hardware-attestation", "use hardware-backed keys to bind node identity to this device when supported by the OS and hardware. Uses TPM 2.0 on Linux and Windows; SecureEnclave on macOS and iOS; and Keystore on Android") + flag.Var(&args.hardwareAttestation, "hardware-attestation", `use hardware-backed keys to bind node identity to this device when supported +by the OS and hardware. Uses TPM 2.0 on Linux and Windows; SecureEnclave on +macOS and iOS; and Keystore on Android. Only supported for Tailscale nodes that +store state on filesystem.`) } if f, ok := hookRegisterOutboundProxyFlags.GetOk(); ok { f() @@ -904,17 +907,17 @@ func applyIntegrationTestEnvKnob() { func handleTPMFlags() { switch { case args.hardwareAttestation.v: - if _, err := key.NewEmptyHardwareAttestationKey(); err == key.ErrUnsupported { + if err := canUseHardwareAttestation(); err != nil { log.SetFlags(0) - log.Fatalf("--hardware-attestation is not supported on this platform or in this build of tailscaled") + log.Fatal(err) } case !args.hardwareAttestation.set: - policyHWAttestation, _ := policyclient.Get().GetBoolean(pkey.HardwareAttestation, feature.HardwareAttestationAvailable()) - if !policyHWAttestation { - break - } - if feature.TPMAvailable() { - args.hardwareAttestation.v = true + policyHWAttestation, _ := policyclient.Get().GetBoolean(pkey.HardwareAttestation, false) + if err := canUseHardwareAttestation(); err != nil { + log.Printf("[unexpected] policy requires hardware attestation, but device does not support it: %v", err) + args.hardwareAttestation.v = false + } else { + args.hardwareAttestation.v = policyHWAttestation } } @@ -926,18 +929,46 @@ func handleTPMFlags() { log.Fatal(err) } case !args.encryptState.set: - policyEncrypt, _ := policyclient.Get().GetBoolean(pkey.EncryptState, feature.TPMAvailable()) - if !policyEncrypt { - // Default disabled, no need to validate. - return - } - // Default enabled if available. - if err := canEncryptState(); err == nil { + policyEncrypt, _ := policyclient.Get().GetBoolean(pkey.EncryptState, false) + if err := canEncryptState(); policyEncrypt && err == nil { args.encryptState.v = true } } } +// canUseHardwareAttestation returns an error if hardware attestation can't be +// enabled, either due to availability or compatibility with other settings. +func canUseHardwareAttestation() error { + if _, err := key.NewEmptyHardwareAttestationKey(); err == key.ErrUnsupported { + return errors.New("--hardware-attestation is not supported on this platform or in this build of tailscaled") + } + // Hardware attestation keys are TPM-bound and cannot be migrated between + // machines. Disable when using portable state stores like kube: or arn: + // where state may be loaded on a different machine. + if args.statepath != "" && isPortableStore(args.statepath) { + return errors.New("--hardware-attestation cannot be used with portable state stores (kube:, arn:) because TPM-bound keys cannot be migrated between machines") + } + return nil +} + +// isPortableStore reports whether the given state path refers to a portable +// state store where state may be loaded on different machines. +// All stores apart from file store and TPM store are portable. +func isPortableStore(path string) bool { + if store.HasKnownProviderPrefix(path) && !strings.HasPrefix(path, store.TPMPrefix) { + return true + } + // In most cases Kubernetes Secret and AWS SSM stores would have been caught + // by the earlier check - but that check relies on those stores having been + // registered. This additional check is here to ensure that if we ever + // produce a faulty build that failed to register some store, users who + // upgraded to that don't get hardware keys generated. + if strings.HasPrefix(path, "kube:") || strings.HasPrefix(path, "arn:") { + return true + } + return false +} + // canEncryptState returns an error if state encryption can't be enabled, // either due to availability or compatibility with other settings. func canEncryptState() error { diff --git a/cmd/tailscaled/tailscaled_test.go b/cmd/tailscaled/tailscaled_test.go index 1188ad35f..36327cccc 100644 --- a/cmd/tailscaled/tailscaled_test.go +++ b/cmd/tailscaled/tailscaled_test.go @@ -88,3 +88,56 @@ func TestStateStoreError(t *testing.T) { } }) } + +func TestIsPortableStore(t *testing.T) { + tests := []struct { + name string + path string + want bool + }{ + { + name: "kube_store", + path: "kube:my-secret", + want: true, + }, + { + name: "aws_arn_store", + path: "arn:aws:ssm:us-east-1:123456789012:parameter/tailscale/state", + want: true, + }, + { + name: "tpm_store", + path: "tpmseal:/var/lib/tailscale/tailscaled.state", + want: false, + }, + { + name: "local_file_store", + path: "/var/lib/tailscale/tailscaled.state", + want: false, + }, + { + name: "empty_path", + path: "", + want: false, + }, + { + name: "mem_store", + path: "mem:", + want: true, + }, + { + name: "windows_file_store", + path: `C:\ProgramData\Tailscale\server-state.conf`, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isPortableStore(tt.path) + if got != tt.want { + t.Errorf("isPortableStore(%q) = %v, want %v", tt.path, got, tt.want) + } + }) + } +} diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index a95b7b921..e0d481f23 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -2488,7 +2488,7 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error { // neither UpdatePrefs or reconciliation should change Persist newPrefs.Persist = b.pm.CurrentPrefs().Persist().AsStruct() - if buildfeatures.HasTPM { + if buildfeatures.HasTPM && b.HardwareAttested() { if genKey, ok := feature.HookGenerateAttestationKeyIfEmpty.GetOk(); ok { newKey, err := genKey(newPrefs.Persist, logf) if err != nil { @@ -2500,6 +2500,12 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error { } } } + // Remove any existing attestation key if HardwareAttested is false. + if !b.HardwareAttested() && newPrefs.Persist != nil && newPrefs.Persist.AttestationKey != nil && !newPrefs.Persist.AttestationKey.IsZero() { + newPrefs.Persist.AttestationKey = nil + prefsChanged = true + prefsChangedWhy = append(prefsChangedWhy, "removeAttestationKey") + } if prefsChanged { logf("updated prefs: %v, reason: %v", newPrefs.Pretty(), prefsChangedWhy) diff --git a/ipn/ipnlocal/profiles.go b/ipn/ipnlocal/profiles.go index 40a3c9887..7080e3c3e 100644 --- a/ipn/ipnlocal/profiles.go +++ b/ipn/ipnlocal/profiles.go @@ -5,10 +5,12 @@ package ipnlocal import ( "cmp" + "crypto" "crypto/rand" "encoding/json" "errors" "fmt" + "io" "runtime" "slices" "strings" @@ -59,6 +61,9 @@ type profileManager struct { // extHost is the bridge between [profileManager] and the registered [ipnext.Extension]s. // It may be nil in tests. A nil pointer is a valid, no-op host. extHost *ExtensionHost + + // Override for key.NewEmptyHardwareAttestationKey used for testing. + newEmptyHardwareAttestationKey func() (key.HardwareAttestationKey, error) } // SetExtensionHost sets the [ExtensionHost] for the [profileManager]. @@ -660,13 +665,23 @@ func (pm *profileManager) loadSavedPrefs(k ipn.StateKey) (ipn.PrefsView, error) // if supported by the platform, create an empty hardware attestation key to use when deserializing // to avoid type exceptions from json.Unmarshaling into an interface{}. - hw, _ := key.NewEmptyHardwareAttestationKey() + hw, _ := pm.newEmptyHardwareAttestationKey() savedPrefs.Persist = &persist.Persist{ AttestationKey: hw, } if err := ipn.PrefsFromBytes(bs, savedPrefs); err != nil { - return ipn.PrefsView{}, fmt.Errorf("parsing saved prefs: %v", err) + // Try loading again, this time ignoring the AttestationKey contents. + // If that succeeds, there's something wrong with the underlying + // attestation key mechanism (most likely the TPM changed), but we + // should at least proceed with client startup. + origErr := err + savedPrefs.Persist.AttestationKey = &noopAttestationKey{} + if err := ipn.PrefsFromBytes(bs, savedPrefs); err != nil { + return ipn.PrefsView{}, fmt.Errorf("parsing saved prefs: %w", err) + } else { + pm.logf("failed to parse savedPrefs with attestation key (error: %v) but parsing without the attestation key succeeded; will proceed without using the old attestation key", origErr) + } } pm.logf("using backend prefs for %q: %v", k, savedPrefs.Pretty()) @@ -912,11 +927,12 @@ func newProfileManagerWithGOOS(store ipn.StateStore, logf logger.Logf, ht *healt metricProfileCount.Set(int64(len(knownProfiles))) pm := &profileManager{ - goos: goos, - store: store, - knownProfiles: knownProfiles, - logf: logf, - health: ht, + goos: goos, + store: store, + knownProfiles: knownProfiles, + logf: logf, + health: ht, + newEmptyHardwareAttestationKey: key.NewEmptyHardwareAttestationKey, } var initialProfile ipn.LoginProfileView @@ -985,3 +1001,21 @@ var ( metricMigrationError = clientmetric.NewCounter("profiles_migration_error") metricMigrationSuccess = clientmetric.NewCounter("profiles_migration_success") ) + +// noopAttestationKey is a key.HardwareAttestationKey that always successfully +// unmarshals as a zero key. +type noopAttestationKey struct{} + +func (n noopAttestationKey) Public() crypto.PublicKey { + panic("noopAttestationKey.Public should not be called; missing IsZero check somewhere?") +} + +func (n noopAttestationKey) Sign(rand io.Reader, digest []byte, opts crypto.SignerOpts) (signature []byte, err error) { + panic("noopAttestationKey.Sign should not be called; missing IsZero check somewhere?") +} + +func (n noopAttestationKey) MarshalJSON() ([]byte, error) { return nil, nil } +func (n noopAttestationKey) UnmarshalJSON([]byte) error { return nil } +func (n noopAttestationKey) Close() error { return nil } +func (n noopAttestationKey) Clone() key.HardwareAttestationKey { return n } +func (n noopAttestationKey) IsZero() bool { return true } diff --git a/ipn/ipnlocal/profiles_test.go b/ipn/ipnlocal/profiles_test.go index 95834284e..6be7f0e53 100644 --- a/ipn/ipnlocal/profiles_test.go +++ b/ipn/ipnlocal/profiles_test.go @@ -4,6 +4,7 @@ package ipnlocal import ( + "errors" "fmt" "os/user" "strconv" @@ -1147,3 +1148,40 @@ func TestProfileStateChangeCallback(t *testing.T) { }) } } + +func TestProfileBadAttestationKey(t *testing.T) { + store := new(mem.Store) + pm, err := newProfileManagerWithGOOS(store, t.Logf, health.NewTracker(eventbustest.NewBus(t)), "linux") + if err != nil { + t.Fatal(err) + } + fk := new(failingHardwareAttestationKey) + pm.newEmptyHardwareAttestationKey = func() (key.HardwareAttestationKey, error) { + return fk, nil + } + sk := ipn.StateKey(t.Name()) + if err := pm.store.WriteState(sk, []byte(`{"Config": {"AttestationKey": {}}}`)); err != nil { + t.Fatal(err) + } + prefs, err := pm.loadSavedPrefs(sk) + if err != nil { + t.Fatal(err) + } + ak := prefs.Persist().AsStruct().AttestationKey + if _, ok := ak.(noopAttestationKey); !ok { + t.Errorf("loaded attestation key of type %T, want noopAttestationKey", ak) + } + if !fk.unmarshalCalled { + t.Error("UnmarshalJSON was not called on failingHardwareAttestationKey") + } +} + +type failingHardwareAttestationKey struct { + noopAttestationKey + unmarshalCalled bool +} + +func (k *failingHardwareAttestationKey) UnmarshalJSON([]byte) error { + k.unmarshalCalled = true + return errors.New("failed to unmarshal attestation key!") +} diff --git a/ipn/store/kubestore/store_kube.go b/ipn/store/kubestore/store_kube.go index f48237c05..ba45409ed 100644 --- a/ipn/store/kubestore/store_kube.go +++ b/ipn/store/kubestore/store_kube.go @@ -6,8 +6,8 @@ package kubestore import ( "context" + "encoding/json" "fmt" - "log" "net" "net/http" "os" @@ -57,6 +57,8 @@ type Store struct { certShareMode string // 'ro', 'rw', or empty podName string + logf logger.Logf + // memory holds the latest tailscale state. Writes write state to a kube // Secret and memory, Reads read from memory. memory mem.Store @@ -96,6 +98,7 @@ func newWithClient(logf logger.Logf, c kubeclient.Client, secretName string) (*S canPatch: canPatch, secretName: secretName, podName: os.Getenv("POD_NAME"), + logf: logf, } if envknob.IsCertShareReadWriteMode() { s.certShareMode = "rw" @@ -113,11 +116,11 @@ func newWithClient(logf logger.Logf, c kubeclient.Client, secretName string) (*S if err := s.loadCerts(context.Background(), sel); err != nil { // We will attempt to again retrieve the certs from Secrets when a request for an HTTPS endpoint // is received. - log.Printf("[unexpected] error loading TLS certs: %v", err) + s.logf("[unexpected] error loading TLS certs: %v", err) } } if s.certShareMode == "ro" { - go s.runCertReload(context.Background(), logf) + go s.runCertReload(context.Background()) } return s, nil } @@ -147,7 +150,7 @@ func (s *Store) WriteState(id ipn.StateKey, bs []byte) (err error) { // of a Tailscale Kubernetes node's state Secret. func (s *Store) WriteTLSCertAndKey(domain string, cert, key []byte) (err error) { if s.certShareMode == "ro" { - log.Printf("[unexpected] TLS cert and key write in read-only mode") + s.logf("[unexpected] TLS cert and key write in read-only mode") } if err := dnsname.ValidHostname(domain); err != nil { return fmt.Errorf("invalid domain name %q: %w", domain, err) @@ -258,11 +261,11 @@ func (s *Store) updateSecret(data map[string][]byte, secretName string) (err err defer func() { if err != nil { if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateUpdateFailed, err.Error()); err != nil { - log.Printf("kubestore: error creating tailscaled state update Event: %v", err) + s.logf("kubestore: error creating tailscaled state update Event: %v", err) } } else { if err := s.client.Event(ctx, eventTypeNormal, reasonTailscaleStateUpdated, "Successfully updated tailscaled state Secret"); err != nil { - log.Printf("kubestore: error creating tailscaled state Event: %v", err) + s.logf("kubestore: error creating tailscaled state Event: %v", err) } } cancel() @@ -342,17 +345,72 @@ func (s *Store) loadState() (err error) { return ipn.ErrStateNotExist } if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateLoadFailed, err.Error()); err != nil { - log.Printf("kubestore: error creating Event: %v", err) + s.logf("kubestore: error creating Event: %v", err) } return err } if err := s.client.Event(ctx, eventTypeNormal, reasonTailscaleStateLoaded, "Successfully loaded tailscaled state from Secret"); err != nil { - log.Printf("kubestore: error creating Event: %v", err) + s.logf("kubestore: error creating Event: %v", err) } - s.memory.LoadFromMap(secret.Data) + data, err := s.maybeStripAttestationKeyFromProfile(secret.Data) + if err != nil { + return fmt.Errorf("error attempting to strip attestation data from state Secret: %w", err) + } + s.memory.LoadFromMap(data) return nil } +// maybeStripAttestationKeyFromProfile removes the hardware attestation key +// field from serialized Tailscale profile. This is done to recover from a bug +// introduced in 1.92, where node-bound hardware attestation keys were added to +// Tailscale states stored in Kubernetes Secrets. +// See https://github.com/tailscale/tailscale/issues/18302 +// TODO(irbekrm): it would be good if we could somehow determine when we no +// longer need to run this check. +func (s *Store) maybeStripAttestationKeyFromProfile(data map[string][]byte) (map[string][]byte, error) { + prefsKey := extractPrefsKey(data) + prefsBytes, ok := data[prefsKey] + if !ok { + return data, nil + } + var prefs map[string]any + if err := json.Unmarshal(prefsBytes, &prefs); err != nil { + s.logf("[unexpected]: kube store: failed to unmarshal prefs data") + // don't error as in most cases the state won't have the attestation key + return data, nil + } + + config, ok := prefs["Config"].(map[string]any) + if !ok { + return data, nil + } + if _, hasKey := config["AttestationKey"]; !hasKey { + return data, nil + } + s.logf("kube store: found redundant attestation key, deleting") + delete(config, "AttestationKey") + prefsBytes, err := json.Marshal(prefs) + if err != nil { + return nil, fmt.Errorf("[unexpected] kube store: failed to marshal profile after removing attestation key: %v", err) + } + data[prefsKey] = prefsBytes + if err := s.updateSecret(map[string][]byte{prefsKey: prefsBytes}, s.secretName); err != nil { + // don't error out - this might have been a temporary kube API server + // connection issue. The key will be removed from the in-memory cache + // and we'll retry updating the Secret on the next restart. + s.logf("kube store: error updating Secret after stripping AttestationKey: %v", err) + } + return data, nil +} + +const currentProfileKey = "_current-profile" + +// extractPrefs returns the key at which Tailscale prefs are stored in the +// provided Secret data. +func extractPrefsKey(data map[string][]byte) string { + return string(data[currentProfileKey]) +} + // runCertReload relists and reloads all TLS certs for endpoints shared by this // node from Secrets other than the state Secret to ensure that renewed certs get eventually loaded. // It is not critical to reload a cert immediately after @@ -361,7 +419,7 @@ func (s *Store) loadState() (err error) { // Note that if shared certs are not found in memory on an HTTPS request, we // do a Secret lookup, so this mechanism does not need to ensure that newly // added Ingresses' certs get loaded. -func (s *Store) runCertReload(ctx context.Context, logf logger.Logf) { +func (s *Store) runCertReload(ctx context.Context) { ticker := time.NewTicker(time.Hour * 24) defer ticker.Stop() for { @@ -371,7 +429,7 @@ func (s *Store) runCertReload(ctx context.Context, logf logger.Logf) { case <-ticker.C: sel := s.certSecretSelector() if err := s.loadCerts(ctx, sel); err != nil { - logf("[unexpected] error reloading TLS certs: %v", err) + s.logf("[unexpected] error reloading TLS certs: %v", err) } } } diff --git a/ipn/store/kubestore/store_kube_test.go b/ipn/store/kubestore/store_kube_test.go index 8c8e5e870..44a4bbb7f 100644 --- a/ipn/store/kubestore/store_kube_test.go +++ b/ipn/store/kubestore/store_kube_test.go @@ -20,6 +20,90 @@ import ( "tailscale.com/kube/kubetypes" ) +func TestKubernetesPodMigrationWithTPMAttestationKey(t *testing.T) { + stateWithAttestationKey := `{ + "Config": { + "NodeID": "nSTABLE123456", + "AttestationKey": { + "tpmPrivate": "c2Vuc2l0aXZlLXRwbS1kYXRhLXRoYXQtb25seS13b3Jrcy1vbi1vcmlnaW5hbC1ub2Rl", + "tpmPublic": "cHVibGljLXRwbS1kYXRhLWZvci1hdHRlc3RhdGlvbi1rZXk=" + } + } + }` + + secretData := map[string][]byte{ + "profile-abc123": []byte(stateWithAttestationKey), + "_current-profile": []byte("profile-abc123"), + } + + client := &kubeclient.FakeClient{ + GetSecretImpl: func(ctx context.Context, name string) (*kubeapi.Secret, error) { + return &kubeapi.Secret{Data: secretData}, nil + }, + CheckSecretPermissionsImpl: func(ctx context.Context, name string) (bool, bool, error) { + return true, true, nil + }, + JSONPatchResourceImpl: func(ctx context.Context, name, resourceType string, patches []kubeclient.JSONPatch) error { + for _, p := range patches { + if p.Op == "add" && p.Path == "/data" { + secretData = p.Value.(map[string][]byte) + } + } + return nil + }, + } + + store := &Store{ + client: client, + canPatch: true, + secretName: "ts-state", + memory: mem.Store{}, + logf: t.Logf, + } + + if err := store.loadState(); err != nil { + t.Fatalf("loadState failed: %v", err) + } + + // Verify we can read the state from the store + stateBytes, err := store.ReadState("profile-abc123") + if err != nil { + t.Fatalf("ReadState failed: %v", err) + } + + // The state should be readable as JSON + var state map[string]json.RawMessage + if err := json.Unmarshal(stateBytes, &state); err != nil { + t.Fatalf("failed to unmarshal state: %v", err) + } + + // Verify the Config field exists + configRaw, ok := state["Config"] + if !ok { + t.Fatal("Config field not found in state") + } + + // Parse the Config to verify fields are preserved + var config map[string]json.RawMessage + if err := json.Unmarshal(configRaw, &config); err != nil { + t.Fatalf("failed to unmarshal Config: %v", err) + } + + // The AttestationKey should be stripped by the kubestore + if _, hasAttestation := config["AttestationKey"]; hasAttestation { + t.Error("AttestationKey should be stripped from state loaded by kubestore") + } + + // Verify other fields are preserved + var nodeID string + if err := json.Unmarshal(config["NodeID"], &nodeID); err != nil { + t.Fatalf("failed to unmarshal NodeID: %v", err) + } + if nodeID != "nSTABLE123456" { + t.Errorf("NodeID mismatch: got %q, want %q", nodeID, "nSTABLE123456") + } +} + func TestWriteState(t *testing.T) { tests := []struct { name string