cmd/k8s-operator: add e2e test setup and l7 ingress test for multi-tailnet (#19426)

This change adds setup for a second tailnet to enable multi-tailnet e2e
tests. When running against devcontrol, a second tailnet is created via the
API. Otherwise, credentials are read from SECOND_TS_API_CLIENT_SECRET.

Also adds an l7 HA Ingress test for multi-tailnet.

Fixes tailscale/corp#37498

Signed-off-by: Becky Pauley <becky@tailscale.com>
This commit is contained in:
BeckyPauley 2026-04-17 17:03:25 +01:00 committed by GitHub
parent d52ae45e9b
commit b239e92eb6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 330 additions and 29 deletions

View File

@ -17,9 +17,11 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"tailscale.com/client/tailscale/v2"
kube "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/kube/kubetypes"
"tailscale.com/tsnet"
"tailscale.com/tstest"
"tailscale.com/util/httpm"
)
@ -274,6 +276,86 @@ func TestL7HAIngress(t *testing.T) {
}
}
func TestL7HAIngressMultiTailnet(t *testing.T) {
if tnClient == nil || secondTNClient == nil {
t.Skip("TestL7HAMultiTailnet requires a working tailnet client for a first and second tailnet")
}
// Apply nginx Deployment and Service.
createAndCleanup(t, kubeClient, nginxDeployment(ns, "nginx"))
createAndCleanup(t, kubeClient, &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "nginx",
Namespace: ns,
},
Spec: corev1.ServiceSpec{
Selector: map[string]string{
"app.kubernetes.io/name": "nginx",
},
Ports: []corev1.ServicePort{
{
Name: "http",
Port: 80,
},
},
},
})
// Create Ingress ProxyGroup for each Tailnet.
firstTailnetPG := &tsapi.ProxyGroup{
ObjectMeta: metav1.ObjectMeta{
Name: "first-tailnet",
},
Spec: tsapi.ProxyGroupSpec{
Type: tsapi.ProxyGroupTypeIngress,
},
}
createAndCleanup(t, kubeClient, firstTailnetPG)
secondTailnetPG := &tsapi.ProxyGroup{
ObjectMeta: metav1.ObjectMeta{
Name: "second-tailnet",
},
Spec: tsapi.ProxyGroupSpec{
Type: tsapi.ProxyGroupTypeIngress,
Tailnet: "second-tailnet",
},
}
createAndCleanup(t, kubeClient, secondTailnetPG)
if err := verifyProxyGroupTailnet(t, firstTailnetPG, tnClient); err != nil {
t.Fatalf("verifying ProxyGroup %s is registered to the correct tailnet: %v", firstTailnetPG.Name, err)
}
if err := verifyProxyGroupTailnet(t, secondTailnetPG, secondTNClient); err != nil {
t.Fatalf("verifying ProxyGroup %s is registered to the correct tailnet: %v", secondTailnetPG.Name, err)
}
// Apply Ingress to expose nginx.
name := generateName("test-ingress")
ingress := l7Ingress(ns, name, map[string]string{
"tailscale.com/proxy-group": "second-tailnet",
})
createAndCleanup(t, kubeClient, ingress)
// Check that the tailscale (VIP) Service has been created in the expected Tailnet.
svcName := "svc:" + name
if err := tstest.WaitFor(3*time.Minute, func() error {
_, err := secondTSClient.VIPServices().Get(t.Context(), svcName)
if tailscale.IsNotFound(err) {
return fmt.Errorf("Tailscale service %q not yet in expected tailnet", svcName)
}
return err
}); err != nil {
t.Fatalf("Tailscale service %q never appeared in expected tailnet: %v", svcName, err)
}
hostname, err := waitForIngressHostname(t, ns, name)
if err != nil {
t.Fatalf("error waiting for Ingress hostname: %v", err)
}
if err := testIngressIsReachable(t, newHTTPClient(secondTNClient), fmt.Sprintf("https://%s:443", hostname)); err != nil {
t.Fatal(err)
}
}
func l7Ingress(namespace, name string, annotations map[string]string) *networkingv1.Ingress {
ingress := &networkingv1.Ingress{
ObjectMeta: metav1.ObjectMeta{
@ -406,6 +488,56 @@ func testIngressIsReachable(t *testing.T, httpClient *http.Client, url string) e
return nil
}
// verifyProxyGroupTailnet verifies that a ProxyGroup is registered to the correct tailnet.
// This is done by getting the expected tailnet domain for the tailnet client,
// and comparing this with the actual device fqdn in the ProxyGroup state secret.
func verifyProxyGroupTailnet(t *testing.T, pg *tsapi.ProxyGroup, cl *tsnet.Server) error {
t.Helper()
// Determine the expected tailnet Magic DNS Name.
lc, err := cl.LocalClient()
if err != nil {
return err
}
status, err := lc.Status(t.Context())
if err != nil {
return err
}
_, expectedTailnet, ok := strings.Cut(strings.TrimSuffix(status.Self.DNSName, "."), ".")
if !ok {
return fmt.Errorf("unexpected DNSName format %q", status.Self.DNSName)
}
// Read the device FQDN from the first state secret for the ProxyGroup,
// and verify that this matches the expected tailnet.
if err := tstest.WaitFor(3*time.Minute, func() error {
var secrets corev1.SecretList
if err := kubeClient.List(t.Context(), &secrets,
client.InNamespace("tailscale"),
client.MatchingLabels{
kubetypes.LabelSecretType: kubetypes.LabelSecretTypeState,
"tailscale.com/parent-resource-type": "proxygroup",
"tailscale.com/parent-resource": pg.Name,
},
); err != nil {
return err
}
if len(secrets.Items) == 0 {
return fmt.Errorf("no state secrets found for ProxyGroup %q yet", pg.Name)
}
fqdn := strings.TrimSuffix(string(secrets.Items[0].Data[kubetypes.KeyDeviceFQDN]), ".")
_, tailnet, ok := strings.Cut(fqdn, ".")
if !ok {
return fmt.Errorf("ProxyGroup %q: device FQDN %q has no domain yet", pg.Name, fqdn)
}
if tailnet != expectedTailnet {
return fmt.Errorf("ProxyGroup %q on wrong tailnet: got domain %q, want %q", pg.Name, tailnet, expectedTailnet)
}
return nil
}); err != nil {
return fmt.Errorf("ProxyGroup %q not on expected tailnet: %v", pg.Name, err)
}
return nil
}
func waitForIngressHostname(t *testing.T, namespace, name string) (string, error) {
t.Helper()
var hostname string

View File

@ -4,6 +4,7 @@
package e2e
import (
"bytes"
"context"
"crypto/rand"
"crypto/tls"
@ -39,6 +40,7 @@ import (
"helm.sh/helm/v3/pkg/release"
"helm.sh/helm/v3/pkg/storage/driver"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/rest"
@ -69,10 +71,13 @@ const (
)
var (
tsClient *tailscale.Client // For API calls to control.
tnClient *tsnet.Server // For testing real tailnet traffic.
restCfg *rest.Config // For constructing a client-go client if necessary.
kubeClient client.WithWatch // For k8s API calls.
tsClient *tailscale.Client // For API calls to control.
tnClient *tsnet.Server // For testing real tailnet traffic on first tailnet.
secondTSClient *tailscale.Client // For API calls to the secondary tailnet (_second_tailnet).
secondTNClient *tsnet.Server // For testing real tailnet traffic on second tailnet.
restCfg *rest.Config // For constructing a client-go client if necessary.
kubeClient client.WithWatch // For k8s API calls.
clusterLoginServer string
//go:embed certs/pebble.minica.crt
pebbleMiniCACert []byte
@ -157,11 +162,11 @@ func runTests(m *testing.M) (int, error) {
}
var (
clusterLoginServer string // Login server from cluster Pod point of view.
clientID, clientSecret string // OAuth client for the operator to use.
clientID, clientSecret string // OAuth client for the first tailnet (for the operator to use).
caPaths []string // Extra CA cert file paths to add to images.
certsDir = filepath.Join(tmp, "certs") // Directory containing extra CA certs to add to images.
certsDir = filepath.Join(tmp, "certs") // Directory containing extra CA certs to add to images.
secondClientID, secondClientSecret string // OAuth client for the second tailnet (for the operator to use).
)
if *fDevcontrol {
// Deploy pebble and get its certs.
@ -279,7 +284,7 @@ func runTests(m *testing.M) (int, error) {
return 0, fmt.Errorf("failed to set policy file: %w", err)
}
logger.Infof("ACLs configured")
logger.Info("ACLs configured for first tailnet")
key, err := tsClient.Keys().CreateOAuthClient(ctx, tailscale.CreateOAuthClientRequest{
Scopes: []string{"auth_keys", "devices:core", "services"},
@ -287,36 +292,77 @@ func runTests(m *testing.M) (int, error) {
Description: "k8s-operator client for e2e tests",
})
if err != nil {
return 0, fmt.Errorf("failed to marshal OAuth client creation request: %w", err)
return 0, fmt.Errorf("failed to create OAuth client for first tailnet: %w", err)
}
clientID = key.ID
clientSecret = key.Key
logger.Info("OAuth credentials set for first tailnet")
// Create second tailnet. The bootstrap credentials returned have 'all' permissions-
// they are used for administrative actions and to create a separately scoped
// Oauth client for the k8s operator.
bootstrapClient, err := createTailnet(ctx, tsClient)
if err != nil {
return 0, fmt.Errorf("failed to create second tailnet: %w", err)
}
// Set HTTPS on second tailnet.
err = bootstrapClient.TailnetSettings().Update(ctx, tailscale.UpdateTailnetSettingsRequest{HTTPSEnabled: new(true)})
if err != nil {
return 0, fmt.Errorf("failed to configure https for second tailnet: %w", err)
}
logger.Info("HTTPS settings configured for second tailnet")
// Set ACLs for second tailnet.
if err = bootstrapClient.PolicyFile().Set(ctx, string(requiredACLs), ""); err != nil {
return 0, fmt.Errorf("failed to set policy file: %w", err)
}
logger.Info("ACLs configured for second tailnet")
// Create an OAuth client for the second tailnet to be used
// by the k8s-operator.
secondKey, err := bootstrapClient.Keys().CreateOAuthClient(ctx, tailscale.CreateOAuthClientRequest{
Scopes: []string{"auth_keys", "devices:core", "services"},
Tags: []string{"tag:k8s-operator"},
Description: "k8s-operator client for e2e tests",
})
if err != nil {
return 0, fmt.Errorf("failed to create OAuth client for second tailnet: %w", err)
}
secondClientID = secondKey.ID
secondClientSecret = secondKey.Key
secondTSClient, err = tailscaleClientFromSecret(ctx, "http://localhost:31544", secondClientID, secondClientSecret)
if err != nil {
return 0, fmt.Errorf("failed to set up second tailnet client: %w", err)
}
} else {
clientSecret = os.Getenv("TS_API_CLIENT_SECRET")
if clientSecret == "" {
return 0, fmt.Errorf("must use --devcontrol or set TS_API_CLIENT_SECRET to an OAuth client suitable for the operator")
}
// Format is "tskey-client-<id>-<random>".
parts := strings.Split(clientSecret, "-")
if len(parts) != 4 {
return 0, fmt.Errorf("TS_API_CLIENT_SECRET is not valid")
}
clientID = parts[2]
credentials := clientcredentials.Config{
ClientID: clientID,
ClientSecret: clientSecret,
TokenURL: fmt.Sprintf("%s/api/v2/oauth/token", ipn.DefaultControlURL),
Scopes: []string{"auth_keys"},
}
tk, err := credentials.Token(ctx)
clientID, err = clientIDFromSecret(clientSecret)
if err != nil {
return 0, fmt.Errorf("failed to get OAuth token: %w", err)
return 0, fmt.Errorf("failed to get client id from secret: %w", err)
}
// An access token will last for an hour which is plenty of time for
// the tests to run. No need for token refresh logic.
tsClient = &tailscale.Client{
APIKey: tk.AccessToken,
tsClient, err = tailscaleClientFromSecret(ctx, ipn.DefaultControlURL, clientID, clientSecret)
if err != nil {
return 0, fmt.Errorf("failed to set up first tailnet client: %w", err)
}
secondClientSecret = os.Getenv("SECOND_TS_API_CLIENT_SECRET")
if secondClientSecret == "" {
return 0, fmt.Errorf("must use --devcontrol or set SECOND_TS_API_CLIENT_SECRET to an OAuth client suitable for the operator")
}
secondClientID, err = clientIDFromSecret(secondClientSecret)
if err != nil {
return 0, fmt.Errorf("failed to get client id from secret: %w", err)
}
secondTSClient, err = tailscaleClientFromSecret(ctx, ipn.DefaultControlURL, secondClientID, secondClientSecret)
if err != nil {
return 0, fmt.Errorf("failed to set up second tailnet client: %w", err)
}
}
@ -446,10 +492,16 @@ func runTests(m *testing.M) (int, error) {
authKey, err := tsClient.Keys().CreateAuthKey(ctx, tailscale.CreateKeyRequest{Capabilities: caps})
if err != nil {
return 0, err
return 0, fmt.Errorf("failed to create auth key for first tailnet: %w", err)
}
defer tsClient.Keys().Delete(context.Background(), authKey.ID)
secondAuthKey, err := secondTSClient.Keys().CreateAuthKey(ctx, tailscale.CreateKeyRequest{Capabilities: caps})
if err != nil {
return 0, fmt.Errorf("failed to create auth key for second tailnet: %w", err)
}
defer secondTSClient.Keys().Delete(context.Background(), secondAuthKey.ID)
tnClient = &tsnet.Server{
ControlURL: tsClient.BaseURL.String(),
Hostname: "test-proxy",
@ -463,9 +515,64 @@ func runTests(m *testing.M) (int, error) {
}
defer tnClient.Close()
secondTNClient = &tsnet.Server{
ControlURL: secondTSClient.BaseURL.String(),
Hostname: "test-proxy",
Ephemeral: true,
Store: &mem.Store{},
AuthKey: secondAuthKey.Key,
}
_, err = secondTNClient.Up(ctx)
if err != nil {
return 0, err
}
defer secondTNClient.Close()
// Create the tailnet Secret in the tailscale namespace.
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "second-tailnet-credentials",
Namespace: "tailscale",
},
Data: map[string][]byte{
"client_id": []byte(secondClientID),
"client_secret": []byte(secondClientSecret),
},
}
if err := createOrUpdate(ctx, kubeClient, secret); err != nil {
return 0, fmt.Errorf("failed to create second-tailnet-credentials Secret: %w", err)
}
defer kubeClient.Delete(context.Background(), secret)
// Create the Tailnet resource.
tn := &tsapi.Tailnet{
ObjectMeta: metav1.ObjectMeta{
Name: "second-tailnet",
},
Spec: tsapi.TailnetSpec{
LoginURL: clusterLoginServer,
Credentials: tsapi.TailnetCredentials{
SecretName: "second-tailnet-credentials",
},
},
}
if err := createOrUpdate(ctx, kubeClient, tn); err != nil {
return 0, fmt.Errorf("failed to create second-tailnet Tailnet: %w", err)
}
defer kubeClient.Delete(context.Background(), tn)
return m.Run(), nil
}
func clientIDFromSecret(clientSecret string) (string, error) {
// Format is "tskey-client-<id>-<random>".
parts := strings.Split(clientSecret, "-")
if len(parts) != 4 {
return "", fmt.Errorf("secret is not valid")
}
return parts[2], nil
}
func upgraderOrInstaller(cfg *action.Configuration, releaseName string) helmInstallerFunc {
hist := action.NewHistory(cfg)
hist.Max = 1
@ -724,3 +831,65 @@ func buildImage(ctx context.Context, dir, repo, target, tag string, extraCACerts
return nil
}
func createOrUpdate(ctx context.Context, cl client.Client, obj client.Object) error {
if err := cl.Create(ctx, obj); err != nil {
if !apierrors.IsAlreadyExists(err) {
return err
}
return cl.Update(ctx, obj)
}
return nil
}
// createTailnet creates a new tailnet and returns a tailscale.Client
// authenticated against it using the bootstrap credentials included in the
// creation response.
func createTailnet(ctx context.Context, tsClient *tailscale.Client) (*tailscale.Client, error) {
tailnetName := fmt.Sprintf("second-tailnet-%d", time.Now().Unix())
body, err := json.Marshal(map[string]any{"displayName": tailnetName})
if err != nil {
return nil, fmt.Errorf("failed to marshal tailnet creation request: %w", err)
}
// TODO(beckypauley): change to use a method on tailscale.Client once this is available.
req, _ := http.NewRequestWithContext(ctx, "POST", tsClient.BaseURL.String()+"/api/v2/organizations/-/tailnets", bytes.NewBuffer(body))
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", tsClient.APIKey))
resp, err := tsClient.HTTP.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to create tailnet: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
b, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("HTTP %d creating tailnet: %s", resp.StatusCode, string(b))
}
var result struct {
OauthClient struct {
ID string `json:"id"`
Secret string `json:"secret"`
} `json:"oauthClient"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return tailscaleClientFromSecret(ctx, tsClient.BaseURL.String(), result.OauthClient.ID, result.OauthClient.Secret)
}
// tailscaleClientFromSecret exchanges OAuth client credentials for an access token and
// returns a tailscale.Client configured to use it. The token is valid for
// one hour, which is sufficient for the tests to run. No need for refresh logic.
func tailscaleClientFromSecret(ctx context.Context, baseURL, clientID, clientSecret string) (*tailscale.Client, error) {
cfg := clientcredentials.Config{
ClientID: clientID,
ClientSecret: clientSecret,
TokenURL: fmt.Sprintf("%s/api/v2/oauth/token", baseURL),
}
tk, err := cfg.Token(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get OAuth token for client %q: %w", clientID, err)
}
return &tailscale.Client{
APIKey: tk.AccessToken,
BaseURL: must.Get(url.Parse(baseURL)),
}, nil
}