cmd/k8s-operator: allow HA ingresses to be deleted when VIP service does not exist (#18050)

This commit fixes a bug in our HA ingress reconciler where ingress resources would
be stuck in a deleting state should their associated VIP service be deleted within
control.

The reconciliation loop would check for the existence of the VIP service and if not
found perform no additional cleanup steps. The code has been modified to continue
onwards even if the VIP service is not found.

Fixes: https://github.com/tailscale/tailscale/issues/18049

Signed-off-by: David Bond <davidsbond93@gmail.com>
This commit is contained in:
David Bond 2025-11-25 12:41:39 +00:00 committed by GitHub
parent 9c3a2aa797
commit d4821cdc2f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 71 additions and 26 deletions

View File

@ -182,9 +182,7 @@ func TestAPIServerProxyReconciler(t *testing.T) {
expectEqual(t, fc, certSecretRoleBinding(pg, ns, defaultDomain))
// Simulate certs being issued; should observe AdvertiseServices config change.
if err := populateTLSSecret(t.Context(), fc, pgName, defaultDomain); err != nil {
t.Fatalf("populating TLS Secret: %v", err)
}
populateTLSSecret(t, fc, pgName, defaultDomain)
expectReconciled(t, r, "", pgName)
expectedCfg.AdvertiseServices = []string{"svc:" + pgName}
@ -247,9 +245,7 @@ func TestAPIServerProxyReconciler(t *testing.T) {
expectMissing[rbacv1.RoleBinding](t, fc, ns, defaultDomain)
// Check we get the new hostname in the status once ready.
if err := populateTLSSecret(t.Context(), fc, pgName, updatedDomain); err != nil {
t.Fatalf("populating TLS Secret: %v", err)
}
populateTLSSecret(t, fc, pgName, updatedDomain)
mustUpdate(t, fc, "operator-ns", "test-pg-0", func(s *corev1.Secret) {
s.Data["profile-foo"] = []byte(`{"AdvertiseServices":["svc:test-pg"],"Config":{"NodeID":"node-foo"}}`)
})

View File

@ -29,6 +29,7 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"tailscale.com/internal/client/tailscale"
"tailscale.com/ipn"
"tailscale.com/ipn/ipnstate"
@ -504,10 +505,7 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string,
logger.Infof("Ensuring that Tailscale Service %q configuration is cleaned up", hostname)
serviceName := tailcfg.ServiceName("svc:" + hostname)
svc, err := r.tsClient.GetVIPService(ctx, serviceName)
if err != nil {
if isErrorTailscaleServiceNotFound(err) {
return false, nil
}
if err != nil && !isErrorTailscaleServiceNotFound(err) {
return false, fmt.Errorf("error getting Tailscale Service: %w", err)
}
@ -713,10 +711,15 @@ func (r *HAIngressReconciler) cleanupTailscaleService(ctx context.Context, svc *
}
if len(o.OwnerRefs) == 1 {
logger.Infof("Deleting Tailscale Service %q", svc.Name)
return false, r.tsClient.DeleteVIPService(ctx, svc.Name)
if err = r.tsClient.DeleteVIPService(ctx, svc.Name); err != nil && !isErrorTailscaleServiceNotFound(err) {
return false, err
}
return false, nil
}
o.OwnerRefs = slices.Delete(o.OwnerRefs, ix, ix+1)
logger.Infof("Deleting Tailscale Service %q", svc.Name)
logger.Infof("Creating/Updating Tailscale Service %q", svc.Name)
json, err := json.Marshal(o)
if err != nil {
return false, fmt.Errorf("error marshalling updated Tailscale Service owner reference: %w", err)

View File

@ -25,6 +25,7 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"tailscale.com/internal/client/tailscale"
"tailscale.com/ipn"
"tailscale.com/ipn/ipnstate"
@ -67,7 +68,7 @@ func TestIngressPGReconciler(t *testing.T) {
// Verify initial reconciliation
expectReconciled(t, ingPGR, "default", "test-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net")
populateTLSSecret(t, fc, "test-pg", "my-svc.ts.net")
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyServeConfig(t, fc, "svc:my-svc", false)
verifyTailscaleService(t, ft, "svc:my-svc", []string{"tcp:443"})
@ -89,7 +90,7 @@ func TestIngressPGReconciler(t *testing.T) {
expectReconciled(t, ingPGR, "default", "test-ingress")
// Verify Tailscale Service uses custom tags
tsSvc, err := ft.GetVIPService(context.Background(), "svc:my-svc")
tsSvc, err := ft.GetVIPService(t.Context(), "svc:my-svc")
if err != nil {
t.Fatalf("getting Tailscale Service: %v", err)
}
@ -134,7 +135,7 @@ func TestIngressPGReconciler(t *testing.T) {
// Verify second Ingress reconciliation
expectReconciled(t, ingPGR, "default", "my-other-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "my-other-svc.ts.net")
populateTLSSecret(t, fc, "test-pg", "my-other-svc.ts.net")
expectReconciled(t, ingPGR, "default", "my-other-ingress")
verifyServeConfig(t, fc, "svc:my-other-svc", false)
verifyTailscaleService(t, ft, "svc:my-other-svc", []string{"tcp:443"})
@ -151,14 +152,14 @@ func TestIngressPGReconciler(t *testing.T) {
verifyTailscaledConfig(t, fc, "test-pg", []string{"svc:my-svc", "svc:my-other-svc"})
// Delete second Ingress
if err := fc.Delete(context.Background(), ing2); err != nil {
if err := fc.Delete(t.Context(), ing2); err != nil {
t.Fatalf("deleting second Ingress: %v", err)
}
expectReconciled(t, ingPGR, "default", "my-other-ingress")
// Verify second Ingress cleanup
cm := &corev1.ConfigMap{}
if err := fc.Get(context.Background(), types.NamespacedName{
if err := fc.Get(t.Context(), types.NamespacedName{
Name: "test-pg-ingress-config",
Namespace: "operator-ns",
}, cm); err != nil {
@ -199,7 +200,7 @@ func TestIngressPGReconciler(t *testing.T) {
expectEqual(t, fc, certSecretRoleBinding(pg, "operator-ns", "my-svc.ts.net"))
// Delete the first Ingress and verify cleanup
if err := fc.Delete(context.Background(), ing); err != nil {
if err := fc.Delete(t.Context(), ing); err != nil {
t.Fatalf("deleting Ingress: %v", err)
}
@ -207,7 +208,7 @@ func TestIngressPGReconciler(t *testing.T) {
// Verify the ConfigMap was cleaned up
cm = &corev1.ConfigMap{}
if err := fc.Get(context.Background(), types.NamespacedName{
if err := fc.Get(t.Context(), types.NamespacedName{
Name: "test-pg-second-ingress-config",
Namespace: "operator-ns",
}, cm); err != nil {
@ -228,6 +229,47 @@ func TestIngressPGReconciler(t *testing.T) {
expectMissing[corev1.Secret](t, fc, "operator-ns", "my-svc.ts.net")
expectMissing[rbacv1.Role](t, fc, "operator-ns", "my-svc.ts.net")
expectMissing[rbacv1.RoleBinding](t, fc, "operator-ns", "my-svc.ts.net")
// Create a third ingress
ing3 := &networkingv1.Ingress{
TypeMeta: metav1.TypeMeta{Kind: "Ingress", APIVersion: "networking.k8s.io/v1"},
ObjectMeta: metav1.ObjectMeta{
Name: "my-other-ingress",
Namespace: "default",
UID: types.UID("5678-UID"),
Annotations: map[string]string{
"tailscale.com/proxy-group": "test-pg",
},
},
Spec: networkingv1.IngressSpec{
IngressClassName: ptr.To("tailscale"),
DefaultBackend: &networkingv1.IngressBackend{
Service: &networkingv1.IngressServiceBackend{
Name: "test",
Port: networkingv1.ServiceBackendPort{
Number: 8080,
},
},
},
TLS: []networkingv1.IngressTLS{
{Hosts: []string{"my-other-svc.tailnetxyz.ts.net"}},
},
},
}
mustCreate(t, fc, ing3)
expectReconciled(t, ingPGR, ing3.Namespace, ing3.Name)
// Delete the service from "control"
ft.vipServices = make(map[tailcfg.ServiceName]*tailscale.VIPService)
// Delete the ingress and confirm we don't get stuck due to the VIP service not existing.
if err = fc.Delete(t.Context(), ing3); err != nil {
t.Fatalf("deleting Ingress: %v", err)
}
expectReconciled(t, ingPGR, ing3.Namespace, ing3.Name)
expectMissing[networkingv1.Ingress](t, fc, ing3.Namespace, ing3.Name)
}
func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) {
@ -262,7 +304,7 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) {
// Verify initial reconciliation
expectReconciled(t, ingPGR, "default", "test-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net")
populateTLSSecret(t, fc, "test-pg", "my-svc.ts.net")
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyServeConfig(t, fc, "svc:my-svc", false)
verifyTailscaleService(t, ft, "svc:my-svc", []string{"tcp:443"})
@ -273,13 +315,13 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) {
ing.Spec.TLS[0].Hosts[0] = "updated-svc"
})
expectReconciled(t, ingPGR, "default", "test-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "updated-svc.ts.net")
populateTLSSecret(t, fc, "test-pg", "updated-svc.ts.net")
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyServeConfig(t, fc, "svc:updated-svc", false)
verifyTailscaleService(t, ft, "svc:updated-svc", []string{"tcp:443"})
verifyTailscaledConfig(t, fc, "test-pg", []string{"svc:updated-svc"})
_, err := ft.GetVIPService(context.Background(), tailcfg.ServiceName("svc:my-svc"))
_, err := ft.GetVIPService(context.Background(), "svc:my-svc")
if err == nil {
t.Fatalf("svc:my-svc not cleaned up")
}
@ -500,7 +542,7 @@ func TestIngressPGReconciler_HTTPEndpoint(t *testing.T) {
// Verify initial reconciliation with HTTP enabled
expectReconciled(t, ingPGR, "default", "test-ingress")
populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net")
populateTLSSecret(t, fc, "test-pg", "my-svc.ts.net")
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyTailscaleService(t, ft, "svc:my-svc", []string{"tcp:80", "tcp:443"})
verifyServeConfig(t, fc, "svc:my-svc", true)
@ -717,7 +759,9 @@ func TestOwnerAnnotations(t *testing.T) {
}
}
func populateTLSSecret(ctx context.Context, c client.Client, pgName, domain string) error {
func populateTLSSecret(t *testing.T, c client.Client, pgName, domain string) {
t.Helper()
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: domain,
@ -736,10 +780,12 @@ func populateTLSSecret(ctx context.Context, c client.Client, pgName, domain stri
},
}
_, err := createOrUpdate(ctx, c, "operator-ns", secret, func(s *corev1.Secret) {
_, err := createOrUpdate(t.Context(), c, "operator-ns", secret, func(s *corev1.Secret) {
s.Data = secret.Data
})
return err
if err != nil {
t.Fatalf("failed to populate TLS secret: %v", err)
}
}
func verifyTailscaleService(t *testing.T, ft *fakeTSClient, serviceName string, wantPorts []string) {