From f2bcfe5e749db420d678b822616626af0bb7abd4 Mon Sep 17 00:00:00 2001 From: chaosinthecrd Date: Tue, 27 Jan 2026 10:23:02 +0000 Subject: [PATCH] cmd/containerboot,kube/services: support the ability to automatically advertise services on startup Updates #17769 Signed-off-by: chaosinthecrd --- cmd/containerboot/main.go | 25 +++++- cmd/containerboot/main_test.go | 70 ++++++++++++--- cmd/containerboot/serve.go | 103 +++++++++++++++------- cmd/containerboot/serve_test.go | 125 ++++++++++++++++++++++++--- cmd/containerboot/settings.go | 7 +- cmd/k8s-operator/proxygroup_specs.go | 4 + cmd/k8s-operator/sts.go | 4 + cmd/k8s-operator/testutils_test.go | 2 + kube/localclient/fake-client.go | 23 +++++ kube/localclient/local-client.go | 10 +++ kube/services/services.go | 37 ++++++++ 11 files changed, 347 insertions(+), 63 deletions(-) diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index a520b5756..dc56f2b21 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -101,6 +101,10 @@ // cluster using the same hostname (in this case, the MagicDNS name of the ingress proxy) // as a non-cluster workload on tailnet. // This is only meant to be configured by the Kubernetes operator. +// - TS_EXPERIMENTAL_SERVICE_AUTO_ADVERTISEMENT: If set to true and if this +// containerboot instance is not running in Kubernetes, autoadvertise any services +// defined in the devices serve config, and unadvertise on shutdown. Defaults +// to `true`, but can be disabled to allow user specific advertisement configuration. // // When running on Kubernetes, containerboot defaults to storing state in the // "tailscale" kube secret. To store state on local disk instead, set @@ -137,6 +141,7 @@ import ( kubeutils "tailscale.com/k8s-operator" healthz "tailscale.com/kube/health" "tailscale.com/kube/kubetypes" + klc "tailscale.com/kube/localclient" "tailscale.com/kube/metrics" "tailscale.com/kube/services" "tailscale.com/tailcfg" @@ -155,6 +160,10 @@ func newNetfilterRunner(logf logger.Logf) (linuxfw.NetfilterRunner, error) { return linuxfw.New(logf, "") } +func getAutoAdvertiseBool() bool { + return defaultBool("TS_EXPERIMENTAL_SERVICE_AUTO_ADVERTISEMENT", true) +} + func main() { if err := run(); err != nil && !errors.Is(err, context.Canceled) { log.Fatal(err) @@ -229,6 +238,7 @@ func run() error { ctx, cancel := context.WithTimeout(context.Background(), 25*time.Second) defer cancel() + // we are shutting down, we always want to unadvertise here if err := services.EnsureServicesNotAdvertised(ctx, client, log.Printf); err != nil { log.Printf("Error ensuring services are not advertised: %v", err) } @@ -652,9 +662,22 @@ runLoop: healthCheck.Update(len(addrs) != 0) } + var prevServeConfig *ipn.ServeConfig + if getAutoAdvertiseBool() { + prevServeConfig, err = client.GetServeConfig(ctx) + if err != nil { + return fmt.Errorf("autoadvertisement: failed to get serve config: %w", err) + } + + err = refreshAdvertiseServices(ctx, prevServeConfig, klc.New(client)) + if err != nil { + return fmt.Errorf("autoadvertisement: failed to refresh advertise services: %w", err) + } + } + if cfg.ServeConfigPath != "" { triggerWatchServeConfigChanges.Do(func() { - go watchServeConfigChanges(ctx, certDomainChanged, certDomain, client, kc, cfg) + go watchServeConfigChanges(ctx, certDomainChanged, certDomain, client, kc, cfg, prevServeConfig) }) } diff --git a/cmd/containerboot/main_test.go b/cmd/containerboot/main_test.go index 7007cc152..11e6e4581 100644 --- a/cmd/containerboot/main_test.go +++ b/cmd/containerboot/main_test.go @@ -1009,6 +1009,25 @@ func TestContainerBoot(t *testing.T) { }, } }, + "serve_config_with_service_auto_advertisement": func(env *testEnv) testCase { + return testCase{ + Env: map[string]string{ + "TS_SERVE_CONFIG": filepath.Join(env.d, "etc/tailscaled/serve-config-with-services.json"), + "TS_AUTHKEY": "tskey-key", + }, + Phases: []phase{ + { + WantCmds: []string{ + "/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=mem: --statedir=/tmp --tun=userspace-networking", + "/usr/bin/tailscale --socket=/tmp/tailscaled.sock up --accept-dns=false --authkey=tskey-key", + }, + }, + { + Notify: runningNotify, + }, + }, + } + }, "kube_shutdown_during_state_write": func(env *testEnv) testCase { return testCase{ Env: map[string]string{ @@ -1159,7 +1178,7 @@ func TestContainerBoot(t *testing.T) { return nil }) if err != nil { - t.Fatalf("phase %d: %v", i, err) + t.Fatalf("test: %q phase %d: %v", name, i, err) } err = tstest.WaitFor(2*time.Second, func() error { for path, want := range p.WantFiles { @@ -1340,10 +1359,16 @@ func (lc *localAPI) Notify(n *ipn.Notify) { func (lc *localAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { case "/localapi/v0/serve-config": - if r.Method != "POST" { + switch r.Method { + case "GET": + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(&ipn.ServeConfig{}) + return + case "POST": + return + default: panic(fmt.Sprintf("unsupported method %q", r.Method)) } - return case "/localapi/v0/watch-ipn-bus": if r.Method != "GET" { panic(fmt.Sprintf("unsupported method %q", r.Method)) @@ -1355,10 +1380,19 @@ func (lc *localAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) { w.Write([]byte("fake metrics")) return case "/localapi/v0/prefs": - if r.Method != "GET" { + switch r.Method { + case "GET": + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(&ipn.Prefs{}) + return + case "PATCH": + // EditPrefs - just return empty prefs + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(&ipn.Prefs{}) + return + default: panic(fmt.Sprintf("unsupported method %q", r.Method)) } - return default: panic(fmt.Sprintf("unsupported path %q", r.URL.Path)) } @@ -1635,6 +1669,13 @@ func newTestEnv(t *testing.T) testEnv { tailscaledConf := &ipn.ConfigVAlpha{AuthKey: ptr.To("foo"), Version: "alpha0"} serveConf := ipn.ServeConfig{TCP: map[uint16]*ipn.TCPPortHandler{80: {HTTP: true}}} + serveConfWithServices := ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{80: {HTTP: true}}, + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:test-service-1": {}, + "svc:test-service-2": {}, + }, + } egressCfg := egressSvcConfig("foo", "foo.tailnetxyz.ts.net") dirs := []string{ @@ -1652,15 +1693,16 @@ func newTestEnv(t *testing.T) testEnv { } } files := map[string][]byte{ - "usr/bin/tailscaled": fakeTailscaled, - "usr/bin/tailscale": fakeTailscale, - "usr/bin/iptables": fakeTailscale, - "usr/bin/ip6tables": fakeTailscale, - "dev/net/tun": []byte(""), - "proc/sys/net/ipv4/ip_forward": []byte("0"), - "proc/sys/net/ipv6/conf/all/forwarding": []byte("0"), - "etc/tailscaled/cap-95.hujson": mustJSON(t, tailscaledConf), - "etc/tailscaled/serve-config.json": mustJSON(t, serveConf), + "usr/bin/tailscaled": fakeTailscaled, + "usr/bin/tailscale": fakeTailscale, + "usr/bin/iptables": fakeTailscale, + "usr/bin/ip6tables": fakeTailscale, + "dev/net/tun": []byte(""), + "proc/sys/net/ipv4/ip_forward": []byte("0"), + "proc/sys/net/ipv6/conf/all/forwarding": []byte("0"), + "etc/tailscaled/cap-95.hujson": mustJSON(t, tailscaledConf), + "etc/tailscaled/serve-config.json": mustJSON(t, serveConf), + "etc/tailscaled/serve-config-with-services.json": mustJSON(t, serveConfWithServices), filepath.Join("etc/tailscaled/", egressservices.KeyEgressServices): mustJSON(t, egressCfg), filepath.Join("etc/tailscaled/", egressservices.KeyHEPPings): []byte("4"), } diff --git a/cmd/containerboot/serve.go b/cmd/containerboot/serve.go index 5fa8e580d..1a1f760d1 100644 --- a/cmd/containerboot/serve.go +++ b/cmd/containerboot/serve.go @@ -9,6 +9,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "log" "os" "path/filepath" @@ -22,6 +23,7 @@ import ( "tailscale.com/kube/certs" "tailscale.com/kube/kubetypes" klc "tailscale.com/kube/localclient" + "tailscale.com/kube/services" "tailscale.com/types/netmap" ) @@ -29,8 +31,9 @@ import ( // the serve config from it, replacing ${TS_CERT_DOMAIN} with certDomain, and // applies it to lc. It exits when ctx is canceled. cdChanged is a channel that // is written to when the certDomain changes, causing the serve config to be -// re-read and applied. -func watchServeConfigChanges(ctx context.Context, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *local.Client, kc *kubeClient, cfg *settings) { +// re-read and applied. prevServeConfig is the serve config that was fetched +// during startup. This will be refreshed by the goroutine when serve config changes. +func watchServeConfigChanges(ctx context.Context, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *local.Client, kc *kubeClient, cfg *settings, prevServeConfig *ipn.ServeConfig) { if certDomainAtomic == nil { panic("certDomainAtomic must not be nil") } @@ -53,11 +56,18 @@ func watchServeConfigChanges(ctx context.Context, cdChanged <-chan bool, certDom } var certDomain string - var prevServeConfig *ipn.ServeConfig var cm *certs.CertManager if cfg.CertShareMode == "rw" { cm = certs.NewCertManager(klc.New(lc), log.Printf) } + + var err error + if prevServeConfig == nil { + prevServeConfig, err = lc.GetServeConfig(ctx) + if err != nil { + log.Fatalf("serve proxy: failed to get serve config: %v", err) + } + } for { select { case <-ctx.Done(): @@ -70,35 +80,68 @@ func watchServeConfigChanges(ctx context.Context, cdChanged <-chan bool, certDom // k8s handles these mounts. So just re-read the file and apply it // if it's changed. } - sc, err := readServeConfig(cfg.ServeConfigPath, certDomain) - if err != nil { - log.Fatalf("serve proxy: failed to read serve config: %v", err) + + var sc *ipn.ServeConfig + if cfg.ServeConfigPath != "" { + sc, err := readServeConfig(cfg.ServeConfigPath, certDomain) + if err != nil { + log.Fatalf("serve proxy: failed to read serve config: %v", err) + } + if sc == nil { + log.Printf("serve proxy: no serve config at %q, skipping", cfg.ServeConfigPath) + continue + } + if prevServeConfig != nil && reflect.DeepEqual(sc, prevServeConfig) { + continue + } + if err := updateServeConfig(ctx, sc, certDomain, klc.New(lc)); err != nil { + log.Fatalf("serve proxy: error updating serve config: %v", err) + } + if kc != nil && kc.canPatch { + if err := kc.storeHTTPSEndpoint(ctx, certDomain); err != nil { + log.Fatalf("serve proxy: error storing HTTPS endpoint: %v", err) + } + } + prevServeConfig = sc + if cfg.CertShareMode != "rw" { + continue + } + if err := cm.EnsureCertLoops(ctx, sc); err != nil { + log.Fatalf("serve proxy: error ensuring cert loops: %v", err) + } + } else { + log.Printf("serve config path not provided.") + sc = prevServeConfig } - if sc == nil { - log.Printf("serve proxy: no serve config at %q, skipping", cfg.ServeConfigPath) - continue - } - if prevServeConfig != nil && reflect.DeepEqual(sc, prevServeConfig) { - continue - } - if err := updateServeConfig(ctx, sc, certDomain, lc); err != nil { - log.Fatalf("serve proxy: error updating serve config: %v", err) - } - if kc != nil && kc.canPatch { - if err := kc.storeHTTPSEndpoint(ctx, certDomain); err != nil { - log.Fatalf("serve proxy: error storing HTTPS endpoint: %v", err) + + // if we are running in kubernetes, we want to leave advertisement to the operator + // to do (by updating the serve config) + if getAutoAdvertiseBool() { + if err := refreshAdvertiseServices(ctx, sc, klc.New(lc)); err != nil { + log.Fatalf("error refreshing advertised services: %v", err) } } - prevServeConfig = sc - if cfg.CertShareMode != "rw" { - continue - } - if err := cm.EnsureCertLoops(ctx, sc); err != nil { - log.Fatalf("serve proxy: error ensuring cert loops: %v", err) - } } } +func refreshAdvertiseServices(ctx context.Context, sc *ipn.ServeConfig, lc klc.LocalClient) error { + if sc == nil || len(sc.Services) == 0 { + return nil + } + + var svcs []string + for svc := range sc.Services { + svcs = append(svcs, svc.String()) + } + + err := services.EnsureServicesAdvertised(ctx, svcs, lc, log.Printf) + if err != nil { + return fmt.Errorf("failed to ensure services advertised: %w", err) + } + + return nil +} + func certDomainFromNetmap(nm *netmap.NetworkMap) string { if len(nm.DNS.CertDomains) == 0 { return "" @@ -106,13 +149,7 @@ func certDomainFromNetmap(nm *netmap.NetworkMap) string { return nm.DNS.CertDomains[0] } -// localClient is a subset of [local.Client] that can be mocked for testing. -type localClient interface { - SetServeConfig(context.Context, *ipn.ServeConfig) error - CertPair(context.Context, string) ([]byte, []byte, error) -} - -func updateServeConfig(ctx context.Context, sc *ipn.ServeConfig, certDomain string, lc localClient) error { +func updateServeConfig(ctx context.Context, sc *ipn.ServeConfig, certDomain string, lc klc.LocalClient) error { if !isValidHTTPSConfig(certDomain, sc) { return nil } diff --git a/cmd/containerboot/serve_test.go b/cmd/containerboot/serve_test.go index fc18f254d..0165a9dfa 100644 --- a/cmd/containerboot/serve_test.go +++ b/cmd/containerboot/serve_test.go @@ -12,9 +12,10 @@ import ( "testing" "github.com/google/go-cmp/cmp" - "tailscale.com/client/local" "tailscale.com/ipn" "tailscale.com/kube/kubetypes" + "tailscale.com/kube/localclient" + "tailscale.com/tailcfg" ) func TestUpdateServeConfig(t *testing.T) { @@ -65,13 +66,13 @@ func TestUpdateServeConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - fakeLC := &fakeLocalClient{} + fakeLC := &localclient.FakeLocalClient{} err := updateServeConfig(context.Background(), tt.sc, tt.certDomain, fakeLC) if err != nil { t.Errorf("updateServeConfig() error = %v", err) } - if fakeLC.setServeCalled != tt.wantCall { - t.Errorf("SetServeConfig() called = %v, want %v", fakeLC.setServeCalled, tt.wantCall) + if fakeLC.SetServeCalled != tt.wantCall { + t.Errorf("SetServeConfig() called = %v, want %v", fakeLC.SetServeCalled, tt.wantCall) } }) } @@ -196,18 +197,114 @@ func TestReadServeConfig(t *testing.T) { } } -type fakeLocalClient struct { - *local.Client - setServeCalled bool -} +func TestRefreshAdvertiseServices(t *testing.T) { + tests := []struct { + name string + sc *ipn.ServeConfig + wantServices []string + wantEditPrefsCalled bool + wantErr bool + }{ + { + name: "nil_serve_config", + sc: nil, + wantEditPrefsCalled: false, + }, + { + name: "empty_serve_config", + sc: &ipn.ServeConfig{}, + wantEditPrefsCalled: false, + }, + { + name: "no_services_defined", + sc: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {HTTP: true}, + }, + }, + wantEditPrefsCalled: false, + }, + { + name: "single_service", + sc: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:my-service": {}, + }, + }, + wantServices: []string{"svc:my-service"}, + wantEditPrefsCalled: true, + }, + { + name: "multiple_services", + sc: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:service-a": {}, + "svc:service-b": {}, + "svc:service-c": {}, + }, + }, + wantServices: []string{"svc:service-a", "svc:service-b", "svc:service-c"}, + wantEditPrefsCalled: true, + }, + { + name: "services_with_tcp_and_web", + sc: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {HTTP: true}, + }, + Web: map[ipn.HostPort]*ipn.WebServerConfig{ + "example.com:443": {}, + }, + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:frontend": {}, + "svc:backend": {}, + }, + }, + wantServices: []string{"svc:frontend", "svc:backend"}, + wantEditPrefsCalled: true, + }, + } -func (m *fakeLocalClient) SetServeConfig(ctx context.Context, cfg *ipn.ServeConfig) error { - m.setServeCalled = true - return nil -} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeLC := &localclient.FakeLocalClient{} + err := refreshAdvertiseServices(context.Background(), tt.sc, fakeLC) -func (m *fakeLocalClient) CertPair(ctx context.Context, domain string) (certPEM, keyPEM []byte, err error) { - return nil, nil, nil + if (err != nil) != tt.wantErr { + t.Errorf("refreshAdvertiseServices() error = %v, wantErr %v", err, tt.wantErr) + } + + if tt.wantEditPrefsCalled != (len(fakeLC.EditPrefsCalls) > 0) { + t.Errorf("EditPrefs called = %v, want %v", len(fakeLC.EditPrefsCalls) > 0, tt.wantEditPrefsCalled) + } + + if tt.wantEditPrefsCalled { + if len(fakeLC.EditPrefsCalls) != 1 { + t.Fatalf("expected 1 EditPrefs call, got %d", len(fakeLC.EditPrefsCalls)) + } + + mp := fakeLC.EditPrefsCalls[0] + if !mp.AdvertiseServicesSet { + t.Error("AdvertiseServicesSet should be true") + } + + if len(mp.AdvertiseServices) != len(tt.wantServices) { + t.Errorf("AdvertiseServices length = %d, want %d", len(mp.Prefs.AdvertiseServices), len(tt.wantServices)) + } + + advertised := make(map[string]bool) + for _, svc := range mp.AdvertiseServices { + advertised[svc] = true + } + + for _, want := range tt.wantServices { + if !advertised[want] { + t.Errorf("expected service %q to be advertised, but it wasn't", want) + } + } + } + }) + } } func TestHasHTTPSEndpoint(t *testing.T) { diff --git a/cmd/containerboot/settings.go b/cmd/containerboot/settings.go index aab2b8631..b9d41c33a 100644 --- a/cmd/containerboot/settings.go +++ b/cmd/containerboot/settings.go @@ -107,7 +107,12 @@ func configFromEnv() (*settings, error) { UserspaceMode: defaultBool("TS_USERSPACE", true), StateDir: defaultEnv("TS_STATE_DIR", ""), AcceptDNS: defaultEnvBoolPointer("TS_ACCEPT_DNS"), - KubeSecret: defaultEnv("TS_KUBE_SECRET", "tailscale"), + KubeSecret: func() string { + if os.Getenv("KUBERNETES_SERVICE_HOST") != "" { + return defaultEnv("TS_KUBE_SECRET", "tailscale") + } + return defaultEnv("TS_KUBE_SECRET", "") + }(), SOCKSProxyAddr: defaultEnv("TS_SOCKS5_SERVER", ""), HTTPProxyAddr: defaultEnv("TS_OUTBOUND_HTTP_PROXY_LISTEN", ""), Socket: defaultEnv("TS_SOCKET", "/tmp/tailscaled.sock"), diff --git a/cmd/k8s-operator/proxygroup_specs.go b/cmd/k8s-operator/proxygroup_specs.go index 930b7049d..efdce07ba 100644 --- a/cmd/k8s-operator/proxygroup_specs.go +++ b/cmd/k8s-operator/proxygroup_specs.go @@ -173,6 +173,10 @@ func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode string Name: "TS_KUBE_SECRET", Value: "$(POD_NAME)", }, + { + Name: "TS_EXPERIMENTAL_SERVICE_AUTO_ADVERTISEMENT", + Value: "false", + }, { // TODO(tomhjp): This is tsrecorder-specific and does nothing. Delete. Name: "TS_STATE", diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index 2919e535c..03d768229 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -692,6 +692,10 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Name: "TS_KUBE_SECRET", Value: "$(POD_NAME)", }, + corev1.EnvVar{ + Name: "TS_EXPERIMENTAL_SERVICE_AUTO_ADVERTISEMENT", + Value: "false", + }, corev1.EnvVar{ Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)", diff --git a/cmd/k8s-operator/testutils_test.go b/cmd/k8s-operator/testutils_test.go index b0e2cfd73..1d87934c5 100644 --- a/cmd/k8s-operator/testutils_test.go +++ b/cmd/k8s-operator/testutils_test.go @@ -91,6 +91,7 @@ func expectedSTS(t *testing.T, cl client.Client, opts configOpts) *appsv1.Statef {Name: "POD_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{APIVersion: "", FieldPath: "metadata.name"}, ResourceFieldRef: nil, ConfigMapKeyRef: nil, SecretKeyRef: nil}}, {Name: "POD_UID", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{APIVersion: "", FieldPath: "metadata.uid"}, ResourceFieldRef: nil, ConfigMapKeyRef: nil, SecretKeyRef: nil}}, {Name: "TS_KUBE_SECRET", Value: "$(POD_NAME)"}, + {Name: "TS_EXPERIMENTAL_SERVICE_AUTO_ADVERTISEMENT", Value: "false"}, {Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)"}, {Name: "TS_DEBUG_ACME_FORCE_RENEWAL", Value: "true"}, }, @@ -287,6 +288,7 @@ func expectedSTSUserspace(t *testing.T, cl client.Client, opts configOpts) *apps {Name: "POD_NAME", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{APIVersion: "", FieldPath: "metadata.name"}, ResourceFieldRef: nil, ConfigMapKeyRef: nil, SecretKeyRef: nil}}, {Name: "POD_UID", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{APIVersion: "", FieldPath: "metadata.uid"}, ResourceFieldRef: nil, ConfigMapKeyRef: nil, SecretKeyRef: nil}}, {Name: "TS_KUBE_SECRET", Value: "$(POD_NAME)"}, + {Name: "TS_EXPERIMENTAL_SERVICE_AUTO_ADVERTISEMENT", Value: "false"}, {Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)"}, {Name: "TS_DEBUG_ACME_FORCE_RENEWAL", Value: "true"}, {Name: "TS_SERVE_CONFIG", Value: "/etc/tailscaled/$(POD_NAME)/serve-config"}, diff --git a/kube/localclient/fake-client.go b/kube/localclient/fake-client.go index 7f0a08316..90370a86b 100644 --- a/kube/localclient/fake-client.go +++ b/kube/localclient/fake-client.go @@ -12,6 +12,29 @@ import ( type FakeLocalClient struct { FakeIPNBusWatcher + SetServeCalled bool + EditPrefsCalls []*ipn.MaskedPrefs + GetPrefsResult *ipn.Prefs +} + +func (m *FakeLocalClient) SetServeConfig(ctx context.Context, cfg *ipn.ServeConfig) error { + m.SetServeCalled = true + return nil +} + +func (m *FakeLocalClient) EditPrefs(ctx context.Context, mp *ipn.MaskedPrefs) (*ipn.Prefs, error) { + m.EditPrefsCalls = append(m.EditPrefsCalls, mp) + if m.GetPrefsResult == nil { + return &ipn.Prefs{}, nil + } + return m.GetPrefsResult, nil +} + +func (m *FakeLocalClient) GetPrefs(ctx context.Context) (*ipn.Prefs, error) { + if m.GetPrefsResult == nil { + return &ipn.Prefs{}, nil + } + return m.GetPrefsResult, nil } func (f *FakeLocalClient) WatchIPNBus(ctx context.Context, mask ipn.NotifyWatchOpt) (IPNBusWatcher, error) { diff --git a/kube/localclient/local-client.go b/kube/localclient/local-client.go index 550b3ae74..052a3a600 100644 --- a/kube/localclient/local-client.go +++ b/kube/localclient/local-client.go @@ -17,6 +17,8 @@ import ( // for easier testing. type LocalClient interface { WatchIPNBus(ctx context.Context, mask ipn.NotifyWatchOpt) (IPNBusWatcher, error) + SetServeConfig(context.Context, *ipn.ServeConfig) error + EditPrefs(ctx context.Context, mp *ipn.MaskedPrefs) (*ipn.Prefs, error) CertIssuer } @@ -40,6 +42,14 @@ type localClient struct { lc *local.Client } +func (lc *localClient) SetServeConfig(ctx context.Context, config *ipn.ServeConfig) error { + return lc.lc.SetServeConfig(ctx, config) +} + +func (lc *localClient) EditPrefs(ctx context.Context, mp *ipn.MaskedPrefs) (*ipn.Prefs, error) { + return lc.lc.EditPrefs(ctx, mp) +} + func (lc *localClient) WatchIPNBus(ctx context.Context, mask ipn.NotifyWatchOpt) (IPNBusWatcher, error) { return lc.lc.WatchIPNBus(ctx, mask) } diff --git a/kube/services/services.go b/kube/services/services.go index a9e50975c..0189b1eda 100644 --- a/kube/services/services.go +++ b/kube/services/services.go @@ -12,9 +12,46 @@ import ( "tailscale.com/client/local" "tailscale.com/ipn" + "tailscale.com/kube/localclient" "tailscale.com/types/logger" ) +// EnsureServicesAdvertised is a function that gets called on containerboot +// startup and ensures that Services get advertised if they exist. +func EnsureServicesAdvertised(ctx context.Context, services []string, lc localclient.LocalClient, logf logger.Logf) error { + if _, err := lc.EditPrefs(ctx, &ipn.MaskedPrefs{ + AdvertiseServicesSet: true, + Prefs: ipn.Prefs{ + AdvertiseServices: services, + }, + }); err != nil { + // EditPrefs only returns an error if it fails _set_ its local prefs. + // If it fails to _persist_ the prefs in state, we don't get an error + // and we continue waiting below, as control will failover as usual. + return fmt.Errorf("error setting prefs AdvertiseServices: %w", err) + } + + // Services use the same (failover XOR regional routing) mechanism that + // HA subnet routers use. Unfortunately we don't yet get a reliable signal + // from control that it's responded to our unadvertisement, so the best we + // can do is wait for 20 seconds, where 15s is the approximate maximum time + // it should take for control to choose a new primary, and 5s is for buffer. + // + // Note: There is no guarantee that clients have been _informed_ of the new + // primary no matter how long we wait. We would need a mechanism to await + // netmap updates for peers to know for sure. + // + // See https://tailscale.com/kb/1115/high-availability for more details. + // TODO(tomhjp): Wait for a netmap update instead of sleeping when control + // supports that. + select { + case <-ctx.Done(): + return nil + case <-time.After(20 * time.Second): + return nil + } +} + // EnsureServicesNotAdvertised is a function that gets called on containerboot // or k8s-proxy termination and ensures that any currently advertised Services // get unadvertised to give clients time to switch to another node before this