mirror of
				https://github.com/tailscale/tailscale.git
				synced 2025-10-30 07:42:12 +01:00 
			
		
		
		
	Previously, the operator checked the ProxyGroup status fields for information on how many of the proxies had successfully authed. Use their state Secrets instead as a more reliable source of truth. containerboot has written device_fqdn and device_ips keys to the state Secret since inception, and pod_uid since 1.78.0, so there's no need to use the API for that data. Read it from the state Secret for consistency. However, to ensure we don't read data from a previous run of containerboot, make sure we reset containerboot's state keys on startup. One other knock-on effect of that is ProxyGroups can briefly be marked not Ready while a Pod is restarting. Introduce a new ProxyGroupAvailable condition to more accurately reflect when downstream controllers can implement flows that rely on a ProxyGroup having at least 1 proxy Pod running. Fixes #16327 Change-Id: I026c18e9d23e87109a471a87b8e4fb6271716a66 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
		
			
				
	
	
		
			181 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			181 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright (c) Tailscale Inc & AUTHORS
 | |
| // SPDX-License-Identifier: BSD-3-Clause
 | |
| 
 | |
| //go:build !plan9
 | |
| 
 | |
| package main
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"strings"
 | |
| 
 | |
| 	"go.uber.org/zap"
 | |
| 	appsv1 "k8s.io/api/apps/v1"
 | |
| 	corev1 "k8s.io/api/core/v1"
 | |
| 	discoveryv1 "k8s.io/api/discovery/v1"
 | |
| 	apiequality "k8s.io/apimachinery/pkg/api/equality"
 | |
| 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 | |
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | |
| 	"sigs.k8s.io/controller-runtime/pkg/client"
 | |
| 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 | |
| 	tsoperator "tailscale.com/k8s-operator"
 | |
| 	tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
 | |
| 	"tailscale.com/tstime"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	reasonReadinessCheckFailed     = "ReadinessCheckFailed"
 | |
| 	reasonClusterResourcesNotReady = "ClusterResourcesNotReady"
 | |
| 	reasonNoProxies                = "NoProxiesConfigured"
 | |
| 	reasonNotReady                 = "NotReadyToRouteTraffic"
 | |
| 	reasonReady                    = "ReadyToRouteTraffic"
 | |
| 	reasonPartiallyReady           = "PartiallyReadyToRouteTraffic"
 | |
| 	msgReadyToRouteTemplate        = "%d out of %d replicas are ready to route traffic"
 | |
| )
 | |
| 
 | |
| type egressSvcsReadinessReconciler struct {
 | |
| 	client.Client
 | |
| 	logger      *zap.SugaredLogger
 | |
| 	clock       tstime.Clock
 | |
| 	tsNamespace string
 | |
| }
 | |
| 
 | |
| // Reconcile reconciles an ExternalName Service that defines a tailnet target to be exposed on a ProxyGroup and sets the
 | |
| // EgressSvcReady condition on it. The condition gets set to true if at least one of the proxies is currently ready to
 | |
| // route traffic to the target. It compares proxy Pod IPs with the endpoints set on the EndpointSlice for the egress
 | |
| // service to determine how many replicas are currently able to route traffic.
 | |
| func (esrr *egressSvcsReadinessReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
 | |
| 	l := esrr.logger.With("Service", req.NamespacedName)
 | |
| 	l.Debugf("starting reconcile")
 | |
| 	defer l.Debugf("reconcile finished")
 | |
| 
 | |
| 	svc := new(corev1.Service)
 | |
| 	if err = esrr.Get(ctx, req.NamespacedName, svc); apierrors.IsNotFound(err) {
 | |
| 		l.Debugf("Service not found")
 | |
| 		return res, nil
 | |
| 	} else if err != nil {
 | |
| 		return res, fmt.Errorf("failed to get Service: %w", err)
 | |
| 	}
 | |
| 	var (
 | |
| 		reason, msg string
 | |
| 		st          metav1.ConditionStatus = metav1.ConditionUnknown
 | |
| 	)
 | |
| 	oldStatus := svc.Status.DeepCopy()
 | |
| 	defer func() {
 | |
| 		tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, st, reason, msg, esrr.clock, l)
 | |
| 		if !apiequality.Semantic.DeepEqual(oldStatus, &svc.Status) {
 | |
| 			err = errors.Join(err, esrr.Status().Update(ctx, svc))
 | |
| 		}
 | |
| 	}()
 | |
| 
 | |
| 	crl := egressSvcChildResourceLabels(svc)
 | |
| 	eps, err := getSingleObject[discoveryv1.EndpointSlice](ctx, esrr.Client, esrr.tsNamespace, crl)
 | |
| 	if err != nil {
 | |
| 		err = fmt.Errorf("error getting EndpointSlice: %w", err)
 | |
| 		reason = reasonReadinessCheckFailed
 | |
| 		msg = err.Error()
 | |
| 		return res, err
 | |
| 	}
 | |
| 	if eps == nil {
 | |
| 		l.Infof("EndpointSlice for Service does not yet exist, waiting...")
 | |
| 		reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
 | |
| 		st = metav1.ConditionFalse
 | |
| 		return res, nil
 | |
| 	}
 | |
| 	pg := &tsapi.ProxyGroup{
 | |
| 		ObjectMeta: metav1.ObjectMeta{
 | |
| 			Name: svc.Annotations[AnnotationProxyGroup],
 | |
| 		},
 | |
| 	}
 | |
| 	err = esrr.Get(ctx, client.ObjectKeyFromObject(pg), pg)
 | |
| 	if apierrors.IsNotFound(err) {
 | |
| 		l.Infof("ProxyGroup for Service does not exist, waiting...")
 | |
| 		reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
 | |
| 		st = metav1.ConditionFalse
 | |
| 		return res, nil
 | |
| 	}
 | |
| 	if err != nil {
 | |
| 		err = fmt.Errorf("error retrieving ProxyGroup: %w", err)
 | |
| 		reason = reasonReadinessCheckFailed
 | |
| 		msg = err.Error()
 | |
| 		return res, err
 | |
| 	}
 | |
| 	if !tsoperator.ProxyGroupAvailable(pg) {
 | |
| 		l.Infof("ProxyGroup for Service is not ready, waiting...")
 | |
| 		reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
 | |
| 		st = metav1.ConditionFalse
 | |
| 		return res, nil
 | |
| 	}
 | |
| 
 | |
| 	replicas := pgReplicas(pg)
 | |
| 	if replicas == 0 {
 | |
| 		l.Infof("ProxyGroup replicas set to 0")
 | |
| 		reason, msg = reasonNoProxies, reasonNoProxies
 | |
| 		st = metav1.ConditionFalse
 | |
| 		return res, nil
 | |
| 	}
 | |
| 	podLabels := pgLabels(pg.Name, nil)
 | |
| 	var readyReplicas int32
 | |
| 	for i := range replicas {
 | |
| 		podLabels[appsv1.PodIndexLabel] = fmt.Sprintf("%d", i)
 | |
| 		pod, err := getSingleObject[corev1.Pod](ctx, esrr.Client, esrr.tsNamespace, podLabels)
 | |
| 		if err != nil {
 | |
| 			err = fmt.Errorf("error retrieving ProxyGroup Pod: %w", err)
 | |
| 			reason = reasonReadinessCheckFailed
 | |
| 			msg = err.Error()
 | |
| 			return res, err
 | |
| 		}
 | |
| 		if pod == nil {
 | |
| 			l.Warnf("[unexpected] ProxyGroup is ready, but replica %d was not found", i)
 | |
| 			reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
 | |
| 			return res, nil
 | |
| 		}
 | |
| 		l.Debugf("looking at Pod with IPs %v", pod.Status.PodIPs)
 | |
| 		ready := false
 | |
| 		for _, ep := range eps.Endpoints {
 | |
| 			l.Debugf("looking at endpoint with addresses %v", ep.Addresses)
 | |
| 			if endpointReadyForPod(&ep, pod, l) {
 | |
| 				l.Debugf("endpoint is ready for Pod")
 | |
| 				ready = true
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 		if ready {
 | |
| 			readyReplicas++
 | |
| 		}
 | |
| 	}
 | |
| 	msg = fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas)
 | |
| 	if readyReplicas == 0 {
 | |
| 		reason = reasonNotReady
 | |
| 		st = metav1.ConditionFalse
 | |
| 		return res, nil
 | |
| 	}
 | |
| 	st = metav1.ConditionTrue
 | |
| 	if readyReplicas < replicas {
 | |
| 		reason = reasonPartiallyReady
 | |
| 	} else {
 | |
| 		reason = reasonReady
 | |
| 	}
 | |
| 	return res, nil
 | |
| }
 | |
| 
 | |
| // endpointReadyForPod returns true if the endpoint is for the Pod's IPv4 address and is ready to serve traffic.
 | |
| // Endpoint must not be nil.
 | |
| func endpointReadyForPod(ep *discoveryv1.Endpoint, pod *corev1.Pod, l *zap.SugaredLogger) bool {
 | |
| 	podIP, err := podIPv4(pod)
 | |
| 	if err != nil {
 | |
| 		l.Warnf("[unexpected] error retrieving Pod's IPv4 address: %v", err)
 | |
| 		return false
 | |
| 	}
 | |
| 	// Currently we only ever set a single address on and Endpoint and nothing else is meant to modify this.
 | |
| 	if len(ep.Addresses) != 1 {
 | |
| 		return false
 | |
| 	}
 | |
| 	return strings.EqualFold(ep.Addresses[0], podIP) &&
 | |
| 		*ep.Conditions.Ready &&
 | |
| 		*ep.Conditions.Serving &&
 | |
| 		!*ep.Conditions.Terminating
 | |
| }
 |