tailscale/kube/state/state.go
Tom Meadows 5eb0b4be31
cmd/containerboot,cmd/k8s-proxy,kube: add authkey renewal to k8s-proxy (#19221)
* kube/authkey,cmd/containerboot: extract shared auth key reissue package

Move auth key reissue logic (set marker, wait for new key, clear marker,
read config) into a shared kube/authkey package and update containerboot
to use it. No behaviour change.

Updates #14080

Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk>

* kube/authkey,kube/state,cmd/containerboot: preserve device_id across restarts

Stop clearing device_id, device_fqdn, and device_ips from state on startup.
These keys are now preserved across restarts so the operator can track
device identity. Expand ClearReissueAuthKey to clear device state and
tailscaled profile data when performing a full auth key reissue.

Updates #14080

Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk>

* cmd/containerboot: use root context for auth key reissue wait

Pass the root context instead of bootCtx to setAndWaitForAuthKeyReissue.
The 60-second bootCtx timeout was cancelling the reissue wait before the
operator had time to respond, causing the pod to crash-loop.

Updates #14080

Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk>

* cmd/k8s-proxy: add auth key renewal support

Add auth key reissue handling to k8s-proxy, mirroring containerboot.
When the proxy detects an auth failure (login-state health warning or
NeedsLogin state), it disconnects from control, signals the operator
via the state Secret, waits for a new key, clears stale state, and
exits so Kubernetes restarts the pod with the new key.

A health watcher goroutine runs alongside ts.Up() to short-circuit
the startup timeout on terminal auth failures.

Updates #14080

Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk>

---------

Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk>
2026-04-15 16:13:46 +01:00

97 lines
3.1 KiB
Go

// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
//go:build !plan9
// Package state updates state keys for tailnet client devices managed by the
// operator. These keys are used to signal readiness, metadata, and current
// configuration state to the operator. Client packages deployed by the operator
// include containerboot, tsrecorder, and k8s-proxy, but currently containerboot
// has its own implementation to manage the same keys.
package state
import (
"context"
"encoding/json"
"fmt"
"tailscale.com/ipn"
"tailscale.com/kube/kubetypes"
klc "tailscale.com/kube/localclient"
"tailscale.com/tailcfg"
"tailscale.com/util/deephash"
)
const (
keyPodUID = ipn.StateKey(kubetypes.KeyPodUID)
keyCapVer = ipn.StateKey(kubetypes.KeyCapVer)
keyDeviceID = ipn.StateKey(kubetypes.KeyDeviceID)
keyDeviceIPs = ipn.StateKey(kubetypes.KeyDeviceIPs)
keyDeviceFQDN = ipn.StateKey(kubetypes.KeyDeviceFQDN)
)
// SetInitialKeys sets Pod UID and cap ver.
func SetInitialKeys(store ipn.StateStore, podUID string) error {
if err := store.WriteState(keyPodUID, []byte(podUID)); err != nil {
return fmt.Errorf("error writing pod UID to state store: %w", err)
}
if err := store.WriteState(keyCapVer, fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion)); err != nil {
return fmt.Errorf("error writing capability version to state store: %w", err)
}
return nil
}
// KeepKeysUpdated sets state store keys consistent with containerboot to
// signal proxy readiness to the operator. It runs until its context is
// cancelled or it hits an error. The passed in next function is expected to be
// from a local.IPNBusWatcher that is at least subscribed to
// ipn.NotifyInitialNetMap.
func KeepKeysUpdated(ctx context.Context, store ipn.StateStore, lc klc.LocalClient) error {
w, err := lc.WatchIPNBus(ctx, ipn.NotifyInitialNetMap)
if err != nil {
return fmt.Errorf("error watching IPN bus: %w", err)
}
defer w.Close()
var currentDeviceID, currentDeviceIPs, currentDeviceFQDN deephash.Sum
for {
n, err := w.Next() // Blocks on a streaming LocalAPI HTTP call.
if err != nil {
if err == ctx.Err() {
return nil
}
return err
}
if n.NetMap == nil {
continue
}
if deviceID := n.NetMap.SelfNode.StableID(); deephash.Update(&currentDeviceID, &deviceID) {
if err := store.WriteState(keyDeviceID, []byte(deviceID)); err != nil {
return fmt.Errorf("failed to store device ID in state: %w", err)
}
}
if fqdn := n.NetMap.SelfNode.Name(); deephash.Update(&currentDeviceFQDN, &fqdn) {
if err := store.WriteState(keyDeviceFQDN, []byte(fqdn)); err != nil {
return fmt.Errorf("failed to store device FQDN in state: %w", err)
}
}
if addrs := n.NetMap.SelfNode.Addresses(); deephash.Update(&currentDeviceIPs, &addrs) {
var deviceIPs []string
for _, addr := range addrs.AsSlice() {
deviceIPs = append(deviceIPs, addr.Addr().String())
}
deviceIPsValue, err := json.Marshal(deviceIPs)
if err != nil {
return err
}
if err := store.WriteState(keyDeviceIPs, deviceIPsValue); err != nil {
return fmt.Errorf("failed to store device IPs in state: %w", err)
}
}
}
}