From b239e92eb65d7fe102d247a4f69838c5e5e12f17 Mon Sep 17 00:00:00 2001 From: BeckyPauley <64131207+BeckyPauley@users.noreply.github.com> Date: Fri, 17 Apr 2026 17:03:25 +0100 Subject: [PATCH] cmd/k8s-operator: add e2e test setup and l7 ingress test for multi-tailnet (#19426) This change adds setup for a second tailnet to enable multi-tailnet e2e tests. When running against devcontrol, a second tailnet is created via the API. Otherwise, credentials are read from SECOND_TS_API_CLIENT_SECRET. Also adds an l7 HA Ingress test for multi-tailnet. Fixes tailscale/corp#37498 Signed-off-by: Becky Pauley --- cmd/k8s-operator/e2e/ingress_test.go | 132 ++++++++++++++++ cmd/k8s-operator/e2e/setup.go | 227 +++++++++++++++++++++++---- 2 files changed, 330 insertions(+), 29 deletions(-) diff --git a/cmd/k8s-operator/e2e/ingress_test.go b/cmd/k8s-operator/e2e/ingress_test.go index 4eb813a77..db12730e3 100644 --- a/cmd/k8s-operator/e2e/ingress_test.go +++ b/cmd/k8s-operator/e2e/ingress_test.go @@ -17,9 +17,11 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" + "tailscale.com/client/tailscale/v2" kube "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/kubetypes" + "tailscale.com/tsnet" "tailscale.com/tstest" "tailscale.com/util/httpm" ) @@ -274,6 +276,86 @@ func TestL7HAIngress(t *testing.T) { } } +func TestL7HAIngressMultiTailnet(t *testing.T) { + if tnClient == nil || secondTNClient == nil { + t.Skip("TestL7HAMultiTailnet requires a working tailnet client for a first and second tailnet") + } + + // Apply nginx Deployment and Service. + createAndCleanup(t, kubeClient, nginxDeployment(ns, "nginx")) + createAndCleanup(t, kubeClient, &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nginx", + Namespace: ns, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{ + "app.kubernetes.io/name": "nginx", + }, + Ports: []corev1.ServicePort{ + { + Name: "http", + Port: 80, + }, + }, + }, + }) + + // Create Ingress ProxyGroup for each Tailnet. + firstTailnetPG := &tsapi.ProxyGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "first-tailnet", + }, + Spec: tsapi.ProxyGroupSpec{ + Type: tsapi.ProxyGroupTypeIngress, + }, + } + createAndCleanup(t, kubeClient, firstTailnetPG) + secondTailnetPG := &tsapi.ProxyGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "second-tailnet", + }, + Spec: tsapi.ProxyGroupSpec{ + Type: tsapi.ProxyGroupTypeIngress, + Tailnet: "second-tailnet", + }, + } + createAndCleanup(t, kubeClient, secondTailnetPG) + + if err := verifyProxyGroupTailnet(t, firstTailnetPG, tnClient); err != nil { + t.Fatalf("verifying ProxyGroup %s is registered to the correct tailnet: %v", firstTailnetPG.Name, err) + } + if err := verifyProxyGroupTailnet(t, secondTailnetPG, secondTNClient); err != nil { + t.Fatalf("verifying ProxyGroup %s is registered to the correct tailnet: %v", secondTailnetPG.Name, err) + } + + // Apply Ingress to expose nginx. + name := generateName("test-ingress") + ingress := l7Ingress(ns, name, map[string]string{ + "tailscale.com/proxy-group": "second-tailnet", + }) + createAndCleanup(t, kubeClient, ingress) + + // Check that the tailscale (VIP) Service has been created in the expected Tailnet. + svcName := "svc:" + name + if err := tstest.WaitFor(3*time.Minute, func() error { + _, err := secondTSClient.VIPServices().Get(t.Context(), svcName) + if tailscale.IsNotFound(err) { + return fmt.Errorf("Tailscale service %q not yet in expected tailnet", svcName) + } + return err + }); err != nil { + t.Fatalf("Tailscale service %q never appeared in expected tailnet: %v", svcName, err) + } + hostname, err := waitForIngressHostname(t, ns, name) + if err != nil { + t.Fatalf("error waiting for Ingress hostname: %v", err) + } + if err := testIngressIsReachable(t, newHTTPClient(secondTNClient), fmt.Sprintf("https://%s:443", hostname)); err != nil { + t.Fatal(err) + } +} + func l7Ingress(namespace, name string, annotations map[string]string) *networkingv1.Ingress { ingress := &networkingv1.Ingress{ ObjectMeta: metav1.ObjectMeta{ @@ -406,6 +488,56 @@ func testIngressIsReachable(t *testing.T, httpClient *http.Client, url string) e return nil } +// verifyProxyGroupTailnet verifies that a ProxyGroup is registered to the correct tailnet. +// This is done by getting the expected tailnet domain for the tailnet client, +// and comparing this with the actual device fqdn in the ProxyGroup state secret. +func verifyProxyGroupTailnet(t *testing.T, pg *tsapi.ProxyGroup, cl *tsnet.Server) error { + t.Helper() + // Determine the expected tailnet Magic DNS Name. + lc, err := cl.LocalClient() + if err != nil { + return err + } + status, err := lc.Status(t.Context()) + if err != nil { + return err + } + _, expectedTailnet, ok := strings.Cut(strings.TrimSuffix(status.Self.DNSName, "."), ".") + if !ok { + return fmt.Errorf("unexpected DNSName format %q", status.Self.DNSName) + } + // Read the device FQDN from the first state secret for the ProxyGroup, + // and verify that this matches the expected tailnet. + if err := tstest.WaitFor(3*time.Minute, func() error { + var secrets corev1.SecretList + if err := kubeClient.List(t.Context(), &secrets, + client.InNamespace("tailscale"), + client.MatchingLabels{ + kubetypes.LabelSecretType: kubetypes.LabelSecretTypeState, + "tailscale.com/parent-resource-type": "proxygroup", + "tailscale.com/parent-resource": pg.Name, + }, + ); err != nil { + return err + } + if len(secrets.Items) == 0 { + return fmt.Errorf("no state secrets found for ProxyGroup %q yet", pg.Name) + } + fqdn := strings.TrimSuffix(string(secrets.Items[0].Data[kubetypes.KeyDeviceFQDN]), ".") + _, tailnet, ok := strings.Cut(fqdn, ".") + if !ok { + return fmt.Errorf("ProxyGroup %q: device FQDN %q has no domain yet", pg.Name, fqdn) + } + if tailnet != expectedTailnet { + return fmt.Errorf("ProxyGroup %q on wrong tailnet: got domain %q, want %q", pg.Name, tailnet, expectedTailnet) + } + return nil + }); err != nil { + return fmt.Errorf("ProxyGroup %q not on expected tailnet: %v", pg.Name, err) + } + return nil +} + func waitForIngressHostname(t *testing.T, namespace, name string) (string, error) { t.Helper() var hostname string diff --git a/cmd/k8s-operator/e2e/setup.go b/cmd/k8s-operator/e2e/setup.go index 55d4652d8..0d4ca80ad 100644 --- a/cmd/k8s-operator/e2e/setup.go +++ b/cmd/k8s-operator/e2e/setup.go @@ -4,6 +4,7 @@ package e2e import ( + "bytes" "context" "crypto/rand" "crypto/tls" @@ -39,6 +40,7 @@ import ( "helm.sh/helm/v3/pkg/release" "helm.sh/helm/v3/pkg/storage/driver" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/rest" @@ -69,10 +71,13 @@ const ( ) var ( - tsClient *tailscale.Client // For API calls to control. - tnClient *tsnet.Server // For testing real tailnet traffic. - restCfg *rest.Config // For constructing a client-go client if necessary. - kubeClient client.WithWatch // For k8s API calls. + tsClient *tailscale.Client // For API calls to control. + tnClient *tsnet.Server // For testing real tailnet traffic on first tailnet. + secondTSClient *tailscale.Client // For API calls to the secondary tailnet (_second_tailnet). + secondTNClient *tsnet.Server // For testing real tailnet traffic on second tailnet. + restCfg *rest.Config // For constructing a client-go client if necessary. + kubeClient client.WithWatch // For k8s API calls. + clusterLoginServer string //go:embed certs/pebble.minica.crt pebbleMiniCACert []byte @@ -157,11 +162,11 @@ func runTests(m *testing.M) (int, error) { } var ( - clusterLoginServer string // Login server from cluster Pod point of view. - clientID, clientSecret string // OAuth client for the operator to use. + clientID, clientSecret string // OAuth client for the first tailnet (for the operator to use). caPaths []string // Extra CA cert file paths to add to images. - certsDir = filepath.Join(tmp, "certs") // Directory containing extra CA certs to add to images. + certsDir = filepath.Join(tmp, "certs") // Directory containing extra CA certs to add to images. + secondClientID, secondClientSecret string // OAuth client for the second tailnet (for the operator to use). ) if *fDevcontrol { // Deploy pebble and get its certs. @@ -279,7 +284,7 @@ func runTests(m *testing.M) (int, error) { return 0, fmt.Errorf("failed to set policy file: %w", err) } - logger.Infof("ACLs configured") + logger.Info("ACLs configured for first tailnet") key, err := tsClient.Keys().CreateOAuthClient(ctx, tailscale.CreateOAuthClientRequest{ Scopes: []string{"auth_keys", "devices:core", "services"}, @@ -287,36 +292,77 @@ func runTests(m *testing.M) (int, error) { Description: "k8s-operator client for e2e tests", }) if err != nil { - return 0, fmt.Errorf("failed to marshal OAuth client creation request: %w", err) + return 0, fmt.Errorf("failed to create OAuth client for first tailnet: %w", err) } - clientID = key.ID clientSecret = key.Key + + logger.Info("OAuth credentials set for first tailnet") + + // Create second tailnet. The bootstrap credentials returned have 'all' permissions- + // they are used for administrative actions and to create a separately scoped + // Oauth client for the k8s operator. + bootstrapClient, err := createTailnet(ctx, tsClient) + if err != nil { + return 0, fmt.Errorf("failed to create second tailnet: %w", err) + } + + // Set HTTPS on second tailnet. + err = bootstrapClient.TailnetSettings().Update(ctx, tailscale.UpdateTailnetSettingsRequest{HTTPSEnabled: new(true)}) + if err != nil { + return 0, fmt.Errorf("failed to configure https for second tailnet: %w", err) + } + logger.Info("HTTPS settings configured for second tailnet") + + // Set ACLs for second tailnet. + if err = bootstrapClient.PolicyFile().Set(ctx, string(requiredACLs), ""); err != nil { + return 0, fmt.Errorf("failed to set policy file: %w", err) + } + + logger.Info("ACLs configured for second tailnet") + + // Create an OAuth client for the second tailnet to be used + // by the k8s-operator. + secondKey, err := bootstrapClient.Keys().CreateOAuthClient(ctx, tailscale.CreateOAuthClientRequest{ + Scopes: []string{"auth_keys", "devices:core", "services"}, + Tags: []string{"tag:k8s-operator"}, + Description: "k8s-operator client for e2e tests", + }) + if err != nil { + return 0, fmt.Errorf("failed to create OAuth client for second tailnet: %w", err) + } + secondClientID = secondKey.ID + secondClientSecret = secondKey.Key + + secondTSClient, err = tailscaleClientFromSecret(ctx, "http://localhost:31544", secondClientID, secondClientSecret) + if err != nil { + return 0, fmt.Errorf("failed to set up second tailnet client: %w", err) + } + } else { clientSecret = os.Getenv("TS_API_CLIENT_SECRET") if clientSecret == "" { return 0, fmt.Errorf("must use --devcontrol or set TS_API_CLIENT_SECRET to an OAuth client suitable for the operator") } - // Format is "tskey-client--". - parts := strings.Split(clientSecret, "-") - if len(parts) != 4 { - return 0, fmt.Errorf("TS_API_CLIENT_SECRET is not valid") - } - clientID = parts[2] - credentials := clientcredentials.Config{ - ClientID: clientID, - ClientSecret: clientSecret, - TokenURL: fmt.Sprintf("%s/api/v2/oauth/token", ipn.DefaultControlURL), - Scopes: []string{"auth_keys"}, - } - tk, err := credentials.Token(ctx) + clientID, err = clientIDFromSecret(clientSecret) if err != nil { - return 0, fmt.Errorf("failed to get OAuth token: %w", err) + return 0, fmt.Errorf("failed to get client id from secret: %w", err) } - // An access token will last for an hour which is plenty of time for - // the tests to run. No need for token refresh logic. - tsClient = &tailscale.Client{ - APIKey: tk.AccessToken, + tsClient, err = tailscaleClientFromSecret(ctx, ipn.DefaultControlURL, clientID, clientSecret) + if err != nil { + return 0, fmt.Errorf("failed to set up first tailnet client: %w", err) + } + secondClientSecret = os.Getenv("SECOND_TS_API_CLIENT_SECRET") + if secondClientSecret == "" { + return 0, fmt.Errorf("must use --devcontrol or set SECOND_TS_API_CLIENT_SECRET to an OAuth client suitable for the operator") + } + secondClientID, err = clientIDFromSecret(secondClientSecret) + if err != nil { + return 0, fmt.Errorf("failed to get client id from secret: %w", err) + } + secondTSClient, err = tailscaleClientFromSecret(ctx, ipn.DefaultControlURL, secondClientID, secondClientSecret) + if err != nil { + return 0, fmt.Errorf("failed to set up second tailnet client: %w", err) } } @@ -446,10 +492,16 @@ func runTests(m *testing.M) (int, error) { authKey, err := tsClient.Keys().CreateAuthKey(ctx, tailscale.CreateKeyRequest{Capabilities: caps}) if err != nil { - return 0, err + return 0, fmt.Errorf("failed to create auth key for first tailnet: %w", err) } defer tsClient.Keys().Delete(context.Background(), authKey.ID) + secondAuthKey, err := secondTSClient.Keys().CreateAuthKey(ctx, tailscale.CreateKeyRequest{Capabilities: caps}) + if err != nil { + return 0, fmt.Errorf("failed to create auth key for second tailnet: %w", err) + } + defer secondTSClient.Keys().Delete(context.Background(), secondAuthKey.ID) + tnClient = &tsnet.Server{ ControlURL: tsClient.BaseURL.String(), Hostname: "test-proxy", @@ -463,9 +515,64 @@ func runTests(m *testing.M) (int, error) { } defer tnClient.Close() + secondTNClient = &tsnet.Server{ + ControlURL: secondTSClient.BaseURL.String(), + Hostname: "test-proxy", + Ephemeral: true, + Store: &mem.Store{}, + AuthKey: secondAuthKey.Key, + } + _, err = secondTNClient.Up(ctx) + if err != nil { + return 0, err + } + defer secondTNClient.Close() + + // Create the tailnet Secret in the tailscale namespace. + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "second-tailnet-credentials", + Namespace: "tailscale", + }, + Data: map[string][]byte{ + "client_id": []byte(secondClientID), + "client_secret": []byte(secondClientSecret), + }, + } + if err := createOrUpdate(ctx, kubeClient, secret); err != nil { + return 0, fmt.Errorf("failed to create second-tailnet-credentials Secret: %w", err) + } + defer kubeClient.Delete(context.Background(), secret) + + // Create the Tailnet resource. + tn := &tsapi.Tailnet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "second-tailnet", + }, + Spec: tsapi.TailnetSpec{ + LoginURL: clusterLoginServer, + Credentials: tsapi.TailnetCredentials{ + SecretName: "second-tailnet-credentials", + }, + }, + } + if err := createOrUpdate(ctx, kubeClient, tn); err != nil { + return 0, fmt.Errorf("failed to create second-tailnet Tailnet: %w", err) + } + defer kubeClient.Delete(context.Background(), tn) + return m.Run(), nil } +func clientIDFromSecret(clientSecret string) (string, error) { + // Format is "tskey-client--". + parts := strings.Split(clientSecret, "-") + if len(parts) != 4 { + return "", fmt.Errorf("secret is not valid") + } + return parts[2], nil +} + func upgraderOrInstaller(cfg *action.Configuration, releaseName string) helmInstallerFunc { hist := action.NewHistory(cfg) hist.Max = 1 @@ -724,3 +831,65 @@ func buildImage(ctx context.Context, dir, repo, target, tag string, extraCACerts return nil } + +func createOrUpdate(ctx context.Context, cl client.Client, obj client.Object) error { + if err := cl.Create(ctx, obj); err != nil { + if !apierrors.IsAlreadyExists(err) { + return err + } + return cl.Update(ctx, obj) + } + return nil +} + +// createTailnet creates a new tailnet and returns a tailscale.Client +// authenticated against it using the bootstrap credentials included in the +// creation response. +func createTailnet(ctx context.Context, tsClient *tailscale.Client) (*tailscale.Client, error) { + tailnetName := fmt.Sprintf("second-tailnet-%d", time.Now().Unix()) + body, err := json.Marshal(map[string]any{"displayName": tailnetName}) + if err != nil { + return nil, fmt.Errorf("failed to marshal tailnet creation request: %w", err) + } + // TODO(beckypauley): change to use a method on tailscale.Client once this is available. + req, _ := http.NewRequestWithContext(ctx, "POST", tsClient.BaseURL.String()+"/api/v2/organizations/-/tailnets", bytes.NewBuffer(body)) + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", tsClient.APIKey)) + resp, err := tsClient.HTTP.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to create tailnet: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + b, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP %d creating tailnet: %s", resp.StatusCode, string(b)) + } + var result struct { + OauthClient struct { + ID string `json:"id"` + Secret string `json:"secret"` + } `json:"oauthClient"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("failed to decode response: %w", err) + } + return tailscaleClientFromSecret(ctx, tsClient.BaseURL.String(), result.OauthClient.ID, result.OauthClient.Secret) +} + +// tailscaleClientFromSecret exchanges OAuth client credentials for an access token and +// returns a tailscale.Client configured to use it. The token is valid for +// one hour, which is sufficient for the tests to run. No need for refresh logic. +func tailscaleClientFromSecret(ctx context.Context, baseURL, clientID, clientSecret string) (*tailscale.Client, error) { + cfg := clientcredentials.Config{ + ClientID: clientID, + ClientSecret: clientSecret, + TokenURL: fmt.Sprintf("%s/api/v2/oauth/token", baseURL), + } + tk, err := cfg.Token(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get OAuth token for client %q: %w", clientID, err) + } + return &tailscale.Client{ + APIKey: tk.AccessToken, + BaseURL: must.Get(url.Parse(baseURL)), + }, nil +}