mirror of
https://github.com/tailscale/tailscale.git
synced 2026-02-11 02:31:20 +01:00
This file was never truly necessary and has never actually been used in the history of Tailscale's open source releases. A Brief History of AUTHORS files --- The AUTHORS file was a pattern developed at Google, originally for Chromium, then adopted by Go and a bunch of other projects. The problem was that Chromium originally had a copyright line only recognizing Google as the copyright holder. Because Google (and most open source projects) do not require copyright assignemnt for contributions, each contributor maintains their copyright. Some large corporate contributors then tried to add their own name to the copyright line in the LICENSE file or in file headers. This quickly becomes unwieldy, and puts a tremendous burden on anyone building on top of Chromium, since the license requires that they keep all copyright lines intact. The compromise was to create an AUTHORS file that would list all of the copyright holders. The LICENSE file and source file headers would then include that list by reference, listing the copyright holder as "The Chromium Authors". This also become cumbersome to simply keep the file up to date with a high rate of new contributors. Plus it's not always obvious who the copyright holder is. Sometimes it is the individual making the contribution, but many times it may be their employer. There is no way for the proejct maintainer to know. Eventually, Google changed their policy to no longer recommend trying to keep the AUTHORS file up to date proactively, and instead to only add to it when requested: https://opensource.google/docs/releasing/authors. They are also clear that: > Adding contributors to the AUTHORS file is entirely within the > project's discretion and has no implications for copyright ownership. It was primarily added to appease a small number of large contributors that insisted that they be recognized as copyright holders (which was entirely their right to do). But it's not truly necessary, and not even the most accurate way of identifying contributors and/or copyright holders. In practice, we've never added anyone to our AUTHORS file. It only lists Tailscale, so it's not really serving any purpose. It also causes confusion because Tailscalars put the "Tailscale Inc & AUTHORS" header in other open source repos which don't actually have an AUTHORS file, so it's ambiguous what that means. Instead, we just acknowledge that the contributors to Tailscale (whoever they are) are copyright holders for their individual contributions. We also have the benefit of using the DCO (developercertificate.org) which provides some additional certification of their right to make the contribution. The source file changes were purely mechanical with: git ls-files | xargs sed -i -e 's/\(Tailscale Inc &\) AUTHORS/\1 contributors/g' Updates #cleanup Change-Id: Ia101a4a3005adb9118051b3416f5a64a4a45987d Signed-off-by: Will Norris <will@tailscale.com>
799 lines
31 KiB
Go
799 lines
31 KiB
Go
// Copyright (c) Tailscale Inc & contributors
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build !plan9
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"math/rand/v2"
|
|
"reflect"
|
|
"slices"
|
|
"strings"
|
|
"sync"
|
|
|
|
"go.uber.org/zap"
|
|
corev1 "k8s.io/api/core/v1"
|
|
discoveryv1 "k8s.io/api/discovery/v1"
|
|
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/intstr"
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
"k8s.io/apiserver/pkg/storage/names"
|
|
"k8s.io/client-go/tools/record"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
tsoperator "tailscale.com/k8s-operator"
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
|
"tailscale.com/kube/egressservices"
|
|
"tailscale.com/kube/kubetypes"
|
|
"tailscale.com/tstime"
|
|
"tailscale.com/util/clientmetric"
|
|
"tailscale.com/util/mak"
|
|
"tailscale.com/util/set"
|
|
)
|
|
|
|
const (
|
|
reasonEgressSvcInvalid = "EgressSvcInvalid"
|
|
reasonEgressSvcValid = "EgressSvcValid"
|
|
reasonEgressSvcCreationFailed = "EgressSvcCreationFailed"
|
|
reasonProxyGroupNotReady = "ProxyGroupNotReady"
|
|
|
|
labelProxyGroup = "tailscale.com/proxy-group"
|
|
|
|
labelSvcType = "tailscale.com/svc-type" // ingress or egress
|
|
typeEgress = "egress"
|
|
// maxPorts is the maximum number of ports that can be exposed on a
|
|
// container. In practice this will be ports in range [10000 - 11000). The
|
|
// high range should make it easier to distinguish container ports from
|
|
// the tailnet target ports for debugging purposes (i.e when reading
|
|
// netfilter rules). The limit of 1000 is somewhat arbitrary, the
|
|
// assumption is that this would not be hit in practice.
|
|
maxPorts = 1000
|
|
|
|
indexEgressProxyGroup = ".metadata.annotations.egress-proxy-group"
|
|
|
|
tsHealthCheckPortName = "tailscale-health-check"
|
|
)
|
|
|
|
var gaugeEgressServices = clientmetric.NewGauge(kubetypes.MetricEgressServiceCount)
|
|
|
|
// egressSvcsReconciler reconciles user created ExternalName Services that specify a tailnet
|
|
// endpoint that should be exposed to cluster workloads and an egress ProxyGroup
|
|
// on whose proxies it should be exposed.
|
|
type egressSvcsReconciler struct {
|
|
client.Client
|
|
logger *zap.SugaredLogger
|
|
recorder record.EventRecorder
|
|
clock tstime.Clock
|
|
tsNamespace string
|
|
|
|
mu sync.Mutex // protects following
|
|
svcs set.Slice[types.UID] // UIDs of all currently managed egress Services for ProxyGroup
|
|
}
|
|
|
|
// Reconcile reconciles an ExternalName Service that specifies a tailnet target and a ProxyGroup on whose proxies should
|
|
// forward cluster traffic to the target.
|
|
// For an ExternalName Service the reconciler:
|
|
//
|
|
// - for each port N defined on the ExternalName Service, allocates a port X in range [3000- 4000), unique for the
|
|
// ProxyGroup proxies. Proxies will forward cluster traffic received on port N to port M on the tailnet target
|
|
//
|
|
// - creates a ClusterIP Service in the operator's namespace with portmappings for all M->N port pairs. This will allow
|
|
// cluster workloads to send traffic on the user-defined tailnet target port and get it transparently mapped to the
|
|
// randomly selected port on proxy Pods.
|
|
//
|
|
// - creates an EndpointSlice in the operator's namespace with kubernetes.io/service-name label pointing to the
|
|
// ClusterIP Service. The endpoints will get dynamically updates to proxy Pod IPs as the Pods become ready to route
|
|
// traffic to the tailnet target. kubernetes.io/service-name label ensures that kube-proxy sets up routing rules to
|
|
// forward cluster traffic received on ClusterIP Service's IP address to the endpoints (Pod IPs).
|
|
//
|
|
// - updates the egress service config in a ConfigMap mounted to the ProxyGroup proxies with the tailnet target and the
|
|
// portmappings.
|
|
func (esr *egressSvcsReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
|
lg := esr.logger.With("Service", req.NamespacedName)
|
|
defer lg.Info("reconcile finished")
|
|
|
|
svc := new(corev1.Service)
|
|
if err = esr.Get(ctx, req.NamespacedName, svc); apierrors.IsNotFound(err) {
|
|
lg.Info("Service not found")
|
|
return res, nil
|
|
} else if err != nil {
|
|
return res, fmt.Errorf("failed to get Service: %w", err)
|
|
}
|
|
|
|
// Name of the 'egress service', meaning the tailnet target.
|
|
tailnetSvc := tailnetSvcName(svc)
|
|
lg = lg.With("tailnet-service", tailnetSvc)
|
|
|
|
// Note that resources for egress Services are only cleaned up when the
|
|
// Service is actually deleted (and not if, for example, user decides to
|
|
// remove the Tailscale annotation from it). This should be fine- we
|
|
// assume that the egress ExternalName Services are always created for
|
|
// Tailscale operator specifically.
|
|
if !svc.DeletionTimestamp.IsZero() {
|
|
lg.Info("Service is being deleted, ensuring resource cleanup")
|
|
return res, esr.maybeCleanup(ctx, svc, lg)
|
|
}
|
|
|
|
oldStatus := svc.Status.DeepCopy()
|
|
defer func() {
|
|
if !apiequality.Semantic.DeepEqual(oldStatus, &svc.Status) {
|
|
err = errors.Join(err, esr.Status().Update(ctx, svc))
|
|
}
|
|
}()
|
|
|
|
// Validate the user-created ExternalName Service and the associated ProxyGroup.
|
|
if ok, err := esr.validateClusterResources(ctx, svc, lg); err != nil {
|
|
return res, fmt.Errorf("error validating cluster resources: %w", err)
|
|
} else if !ok {
|
|
return res, nil
|
|
}
|
|
|
|
if !slices.Contains(svc.Finalizers, FinalizerName) {
|
|
svc.Finalizers = append(svc.Finalizers, FinalizerName)
|
|
if err := esr.updateSvcSpec(ctx, svc); err != nil {
|
|
err := fmt.Errorf("failed to add finalizer: %w", err)
|
|
r := svcConfiguredReason(svc, false, lg)
|
|
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcConfigured, metav1.ConditionFalse, r, err.Error(), esr.clock, lg)
|
|
return res, err
|
|
}
|
|
esr.mu.Lock()
|
|
esr.svcs.Add(svc.UID)
|
|
gaugeEgressServices.Set(int64(esr.svcs.Len()))
|
|
esr.mu.Unlock()
|
|
}
|
|
|
|
if err := esr.maybeCleanupProxyGroupConfig(ctx, svc, lg); err != nil {
|
|
err = fmt.Errorf("cleaning up resources for previous ProxyGroup failed: %w", err)
|
|
r := svcConfiguredReason(svc, false, lg)
|
|
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcConfigured, metav1.ConditionFalse, r, err.Error(), esr.clock, lg)
|
|
return res, err
|
|
}
|
|
|
|
if err := esr.maybeProvision(ctx, svc, lg); err != nil {
|
|
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
|
lg.Infof("optimistic lock error, retrying: %s", err)
|
|
} else {
|
|
return reconcile.Result{}, err
|
|
}
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
func (esr *egressSvcsReconciler) maybeProvision(ctx context.Context, svc *corev1.Service, lg *zap.SugaredLogger) (err error) {
|
|
r := svcConfiguredReason(svc, false, lg)
|
|
st := metav1.ConditionFalse
|
|
defer func() {
|
|
msg := r
|
|
if st != metav1.ConditionTrue && err != nil {
|
|
msg = err.Error()
|
|
}
|
|
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcConfigured, st, r, msg, esr.clock, lg)
|
|
}()
|
|
|
|
crl := egressSvcChildResourceLabels(svc)
|
|
clusterIPSvc, err := getSingleObject[corev1.Service](ctx, esr.Client, esr.tsNamespace, crl)
|
|
if err != nil {
|
|
err = fmt.Errorf("error retrieving ClusterIP Service: %w", err)
|
|
return err
|
|
}
|
|
if clusterIPSvc == nil {
|
|
clusterIPSvc = esr.clusterIPSvcForEgress(crl)
|
|
}
|
|
upToDate := svcConfigurationUpToDate(svc, lg)
|
|
provisioned := true
|
|
if !upToDate {
|
|
if clusterIPSvc, provisioned, err = esr.provision(ctx, svc.Annotations[AnnotationProxyGroup], svc, clusterIPSvc, lg); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if !provisioned {
|
|
lg.Infof("unable to provision cluster resources")
|
|
return nil
|
|
}
|
|
|
|
// Update ExternalName Service to point at the ClusterIP Service.
|
|
clusterDomain := retrieveClusterDomain(esr.tsNamespace, lg)
|
|
clusterIPSvcFQDN := fmt.Sprintf("%s.%s.svc.%s", clusterIPSvc.Name, clusterIPSvc.Namespace, clusterDomain)
|
|
if svc.Spec.ExternalName != clusterIPSvcFQDN {
|
|
lg.Infof("Configuring ExternalName Service to point to ClusterIP Service %s", clusterIPSvcFQDN)
|
|
svc.Spec.ExternalName = clusterIPSvcFQDN
|
|
if err = esr.updateSvcSpec(ctx, svc); err != nil {
|
|
err = fmt.Errorf("error updating ExternalName Service: %w", err)
|
|
return err
|
|
}
|
|
}
|
|
r = svcConfiguredReason(svc, true, lg)
|
|
st = metav1.ConditionTrue
|
|
return nil
|
|
}
|
|
|
|
func (esr *egressSvcsReconciler) provision(ctx context.Context, proxyGroupName string, svc, clusterIPSvc *corev1.Service, lg *zap.SugaredLogger) (*corev1.Service, bool, error) {
|
|
lg.Infof("updating configuration...")
|
|
usedPorts, err := esr.usedPortsForPG(ctx, proxyGroupName)
|
|
if err != nil {
|
|
return nil, false, fmt.Errorf("error calculating used ports for ProxyGroup %s: %w", proxyGroupName, err)
|
|
}
|
|
|
|
oldClusterIPSvc := clusterIPSvc.DeepCopy()
|
|
// loop over ClusterIP Service ports, remove any that are not needed.
|
|
for i := len(clusterIPSvc.Spec.Ports) - 1; i >= 0; i-- {
|
|
pm := clusterIPSvc.Spec.Ports[i]
|
|
found := false
|
|
for _, wantsPM := range svc.Spec.Ports {
|
|
if wantsPM.Port == pm.Port && strings.EqualFold(string(wantsPM.Protocol), string(pm.Protocol)) {
|
|
// We want to both preserve the user set port names for ease of debugging, but also
|
|
// ensure that we name all unnamed ports as the ClusterIP Service that we create will
|
|
// always have at least two ports.
|
|
// https://kubernetes.io/docs/concepts/services-networking/service/#multi-port-services
|
|
// See also https://github.com/tailscale/tailscale/issues/13406#issuecomment-2507230388
|
|
if wantsPM.Name != "" {
|
|
clusterIPSvc.Spec.Ports[i].Name = wantsPM.Name
|
|
} else {
|
|
clusterIPSvc.Spec.Ports[i].Name = "tailscale-unnamed"
|
|
}
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
lg.Debugf("portmapping %s:%d -> %s:%d is no longer required, removing", pm.Protocol, pm.TargetPort.IntVal, pm.Protocol, pm.Port)
|
|
clusterIPSvc.Spec.Ports = slices.Delete(clusterIPSvc.Spec.Ports, i, i+1)
|
|
}
|
|
}
|
|
|
|
// loop over ExternalName Service ports, for each one not found on
|
|
// ClusterIP Service produce new target port and add a portmapping to
|
|
// the ClusterIP Service.
|
|
for _, wantsPM := range svc.Spec.Ports {
|
|
// Because we add a healthcheck port of our own, we will always have at least two ports. That
|
|
// means that we cannot have ports with name not set.
|
|
// https://kubernetes.io/docs/concepts/services-networking/service/#multi-port-services
|
|
if wantsPM.Name == "" {
|
|
wantsPM.Name = "tailscale-unnamed"
|
|
}
|
|
found := false
|
|
for _, gotPM := range clusterIPSvc.Spec.Ports {
|
|
if wantsPM.Port == gotPM.Port && strings.EqualFold(string(wantsPM.Protocol), string(gotPM.Protocol)) {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
// Calculate a free port to expose on container and add
|
|
// a new PortMap to the ClusterIP Service.
|
|
if usedPorts.Len() >= maxPorts {
|
|
// TODO(irbekrm): refactor to avoid extra reconciles here. Low priority as in practice,
|
|
// the limit should not be hit.
|
|
return nil, false, fmt.Errorf("unable to allocate additional ports on ProxyGroup %s, %d ports already used. Create another ProxyGroup or open an issue if you believe this is unexpected.", proxyGroupName, maxPorts)
|
|
}
|
|
p := unusedPort(usedPorts)
|
|
lg.Debugf("mapping tailnet target port %d to container port %d", wantsPM.Port, p)
|
|
usedPorts.Insert(p)
|
|
clusterIPSvc.Spec.Ports = append(clusterIPSvc.Spec.Ports, corev1.ServicePort{
|
|
Name: wantsPM.Name,
|
|
Protocol: wantsPM.Protocol,
|
|
Port: wantsPM.Port,
|
|
TargetPort: intstr.FromInt32(p),
|
|
})
|
|
}
|
|
}
|
|
var healthCheckPort int32 = defaultLocalAddrPort
|
|
|
|
for {
|
|
if !slices.ContainsFunc(svc.Spec.Ports, func(p corev1.ServicePort) bool {
|
|
return p.Port == healthCheckPort
|
|
}) {
|
|
break
|
|
}
|
|
healthCheckPort++
|
|
if healthCheckPort > 10002 {
|
|
return nil, false, fmt.Errorf("unable to find a free port for internal health check in range [9002, 10002]")
|
|
}
|
|
}
|
|
clusterIPSvc.Spec.Ports = append(clusterIPSvc.Spec.Ports, corev1.ServicePort{
|
|
Name: tsHealthCheckPortName,
|
|
Port: healthCheckPort,
|
|
TargetPort: intstr.FromInt(defaultLocalAddrPort),
|
|
Protocol: "TCP",
|
|
})
|
|
if !reflect.DeepEqual(clusterIPSvc, oldClusterIPSvc) {
|
|
if clusterIPSvc, err = createOrUpdate(ctx, esr.Client, esr.tsNamespace, clusterIPSvc, func(svc *corev1.Service) {
|
|
svc.Labels = clusterIPSvc.Labels
|
|
svc.Spec = clusterIPSvc.Spec
|
|
}); err != nil {
|
|
return nil, false, fmt.Errorf("error ensuring ClusterIP Service: %v", err)
|
|
}
|
|
}
|
|
|
|
crl := egressSvcEpsLabels(svc, clusterIPSvc)
|
|
// TODO(irbekrm): support IPv6, but need to investigate how kube proxy
|
|
// sets up Service -> Pod routing when IPv6 is involved.
|
|
eps := &discoveryv1.EndpointSlice{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: fmt.Sprintf("%s-ipv4", clusterIPSvc.Name),
|
|
Namespace: esr.tsNamespace,
|
|
Labels: crl,
|
|
},
|
|
AddressType: discoveryv1.AddressTypeIPv4,
|
|
Ports: epsPortsFromSvc(clusterIPSvc),
|
|
}
|
|
if eps, err = createOrUpdate(ctx, esr.Client, esr.tsNamespace, eps, func(e *discoveryv1.EndpointSlice) {
|
|
e.Labels = eps.Labels
|
|
e.AddressType = eps.AddressType
|
|
e.Ports = eps.Ports
|
|
for _, p := range e.Endpoints {
|
|
p.Conditions.Ready = nil
|
|
}
|
|
}); err != nil {
|
|
return nil, false, fmt.Errorf("error ensuring EndpointSlice: %w", err)
|
|
}
|
|
|
|
cm, cfgs, err := egressSvcsConfigs(ctx, esr.Client, proxyGroupName, esr.tsNamespace)
|
|
if err != nil {
|
|
return nil, false, fmt.Errorf("error retrieving egress services configuration: %w", err)
|
|
}
|
|
if cm == nil {
|
|
lg.Info("ConfigMap not yet created, waiting..")
|
|
return nil, false, nil
|
|
}
|
|
tailnetSvc := tailnetSvcName(svc)
|
|
gotCfg := (*cfgs)[tailnetSvc]
|
|
wantsCfg := egressSvcCfg(svc, clusterIPSvc, esr.tsNamespace, lg)
|
|
if !reflect.DeepEqual(gotCfg, wantsCfg) {
|
|
lg.Debugf("updating egress services ConfigMap %s", cm.Name)
|
|
mak.Set(cfgs, tailnetSvc, wantsCfg)
|
|
bs, err := json.Marshal(cfgs)
|
|
if err != nil {
|
|
return nil, false, fmt.Errorf("error marshalling egress services configs: %w", err)
|
|
}
|
|
mak.Set(&cm.BinaryData, egressservices.KeyEgressServices, bs)
|
|
if err := esr.Update(ctx, cm); err != nil {
|
|
return nil, false, fmt.Errorf("error updating egress services ConfigMap: %w", err)
|
|
}
|
|
}
|
|
lg.Infof("egress service configuration has been updated")
|
|
return clusterIPSvc, true, nil
|
|
}
|
|
|
|
func (esr *egressSvcsReconciler) maybeCleanup(ctx context.Context, svc *corev1.Service, logger *zap.SugaredLogger) error {
|
|
logger.Info("ensuring that resources created for egress service are deleted")
|
|
|
|
// Delete egress service config from the ConfigMap mounted by the proxies.
|
|
if err := esr.ensureEgressSvcCfgDeleted(ctx, svc, logger); err != nil {
|
|
return fmt.Errorf("error deleting egress service config: %w", err)
|
|
}
|
|
|
|
// Delete the ClusterIP Service and EndpointSlice for the egress
|
|
// service.
|
|
types := []client.Object{
|
|
&corev1.Service{},
|
|
&discoveryv1.EndpointSlice{},
|
|
}
|
|
crl := egressSvcChildResourceLabels(svc)
|
|
for _, typ := range types {
|
|
if err := esr.DeleteAllOf(ctx, typ, client.InNamespace(esr.tsNamespace), client.MatchingLabels(crl)); err != nil {
|
|
return fmt.Errorf("error deleting %s: %w", typ, err)
|
|
}
|
|
}
|
|
|
|
ix := slices.Index(svc.Finalizers, FinalizerName)
|
|
if ix != -1 {
|
|
logger.Debug("Removing Tailscale finalizer from Service")
|
|
svc.Finalizers = append(svc.Finalizers[:ix], svc.Finalizers[ix+1:]...)
|
|
if err := esr.Update(ctx, svc); err != nil {
|
|
return fmt.Errorf("failed to remove finalizer: %w", err)
|
|
}
|
|
}
|
|
esr.mu.Lock()
|
|
esr.svcs.Remove(svc.UID)
|
|
gaugeEgressServices.Set(int64(esr.svcs.Len()))
|
|
esr.mu.Unlock()
|
|
logger.Info("successfully cleaned up resources for egress Service")
|
|
return nil
|
|
}
|
|
|
|
func (esr *egressSvcsReconciler) maybeCleanupProxyGroupConfig(ctx context.Context, svc *corev1.Service, lg *zap.SugaredLogger) error {
|
|
wantsProxyGroup := svc.Annotations[AnnotationProxyGroup]
|
|
cond := tsoperator.GetServiceCondition(svc, tsapi.EgressSvcConfigured)
|
|
if cond == nil {
|
|
return nil
|
|
}
|
|
ss := strings.Split(cond.Reason, ":")
|
|
if len(ss) < 3 {
|
|
return nil
|
|
}
|
|
if strings.EqualFold(wantsProxyGroup, ss[2]) {
|
|
return nil
|
|
}
|
|
esr.logger.Infof("egress Service configured on ProxyGroup %s, wants ProxyGroup %s, cleaning up...", ss[2], wantsProxyGroup)
|
|
if err := esr.ensureEgressSvcCfgDeleted(ctx, svc, lg); err != nil {
|
|
return fmt.Errorf("error deleting egress service config: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// usedPortsForPG calculates the currently used match ports for ProxyGroup
|
|
// containers. It does that by looking by retrieving all target ports of all
|
|
// ClusterIP Services created for egress services exposed on this ProxyGroup's
|
|
// proxies.
|
|
// TODO(irbekrm): this is currently good enough because we only have a single worker and
|
|
// because these Services are created by us, so we can always expect to get the
|
|
// latest ClusterIP Services via the controller cache. It will not work as well
|
|
// once we split into multiple workers- at that point we probably want to set
|
|
// used ports on ProxyGroup's status.
|
|
func (esr *egressSvcsReconciler) usedPortsForPG(ctx context.Context, pg string) (sets.Set[int32], error) {
|
|
svcList := &corev1.ServiceList{}
|
|
if err := esr.List(ctx, svcList, client.InNamespace(esr.tsNamespace), client.MatchingLabels(map[string]string{labelProxyGroup: pg})); err != nil {
|
|
return nil, fmt.Errorf("error listing Services: %w", err)
|
|
}
|
|
usedPorts := sets.New[int32]()
|
|
for _, s := range svcList.Items {
|
|
for _, p := range s.Spec.Ports {
|
|
usedPorts.Insert(p.TargetPort.IntVal)
|
|
}
|
|
}
|
|
return usedPorts, nil
|
|
}
|
|
|
|
// clusterIPSvcForEgress returns a template for the ClusterIP Service created
|
|
// for an egress service exposed on ProxyGroup proxies. The ClusterIP Service
|
|
// has no selector. Traffic sent to it will be routed to the endpoints defined
|
|
// by an EndpointSlice created for this egress service.
|
|
func (esr *egressSvcsReconciler) clusterIPSvcForEgress(crl map[string]string) *corev1.Service {
|
|
return &corev1.Service{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
GenerateName: svcNameBase(crl[LabelParentName]),
|
|
Namespace: esr.tsNamespace,
|
|
Labels: crl,
|
|
},
|
|
Spec: corev1.ServiceSpec{
|
|
Type: corev1.ServiceTypeClusterIP,
|
|
},
|
|
}
|
|
}
|
|
|
|
func (esr *egressSvcsReconciler) ensureEgressSvcCfgDeleted(ctx context.Context, svc *corev1.Service, logger *zap.SugaredLogger) error {
|
|
crl := egressSvcChildResourceLabels(svc)
|
|
cmName := pgEgressCMName(crl[labelProxyGroup])
|
|
cm := &corev1.ConfigMap{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: cmName,
|
|
Namespace: esr.tsNamespace,
|
|
},
|
|
}
|
|
lggr := logger.With("ConfigMap", client.ObjectKeyFromObject(cm))
|
|
lggr.Debug("ensuring that egress service configuration is removed from proxy config")
|
|
if err := esr.Get(ctx, client.ObjectKeyFromObject(cm), cm); apierrors.IsNotFound(err) {
|
|
lggr.Debugf("ConfigMap not found")
|
|
return nil
|
|
} else if err != nil {
|
|
return fmt.Errorf("error retrieving ConfigMap: %w", err)
|
|
}
|
|
bs := cm.BinaryData[egressservices.KeyEgressServices]
|
|
if len(bs) == 0 {
|
|
lggr.Debugf("ConfigMap does not contain egress service configs")
|
|
return nil
|
|
}
|
|
cfgs := &egressservices.Configs{}
|
|
if err := json.Unmarshal(bs, cfgs); err != nil {
|
|
return fmt.Errorf("error unmarshalling egress services configs")
|
|
}
|
|
tailnetSvc := tailnetSvcName(svc)
|
|
_, ok := (*cfgs)[tailnetSvc]
|
|
if !ok {
|
|
lggr.Debugf("ConfigMap does not contain egress service config, likely because it was already deleted")
|
|
return nil
|
|
}
|
|
lggr.Infof("before deleting config %+#v", *cfgs)
|
|
delete(*cfgs, tailnetSvc)
|
|
lggr.Infof("after deleting config %+#v", *cfgs)
|
|
bs, err := json.Marshal(cfgs)
|
|
if err != nil {
|
|
return fmt.Errorf("error marshalling egress services configs: %w", err)
|
|
}
|
|
mak.Set(&cm.BinaryData, egressservices.KeyEgressServices, bs)
|
|
return esr.Update(ctx, cm)
|
|
}
|
|
|
|
func (esr *egressSvcsReconciler) validateClusterResources(ctx context.Context, svc *corev1.Service, lg *zap.SugaredLogger) (bool, error) {
|
|
proxyGroupName := svc.Annotations[AnnotationProxyGroup]
|
|
pg := &tsapi.ProxyGroup{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: proxyGroupName,
|
|
},
|
|
}
|
|
if err := esr.Get(ctx, client.ObjectKeyFromObject(pg), pg); apierrors.IsNotFound(err) {
|
|
lg.Infof("ProxyGroup %q not found, waiting...", proxyGroupName)
|
|
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcValid, metav1.ConditionUnknown, reasonProxyGroupNotReady, reasonProxyGroupNotReady, esr.clock, lg)
|
|
tsoperator.RemoveServiceCondition(svc, tsapi.EgressSvcConfigured)
|
|
return false, nil
|
|
} else if err != nil {
|
|
err := fmt.Errorf("unable to retrieve ProxyGroup %s: %w", proxyGroupName, err)
|
|
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcValid, metav1.ConditionUnknown, reasonProxyGroupNotReady, err.Error(), esr.clock, lg)
|
|
tsoperator.RemoveServiceCondition(svc, tsapi.EgressSvcConfigured)
|
|
return false, err
|
|
}
|
|
if violations := validateEgressService(svc, pg); len(violations) > 0 {
|
|
msg := fmt.Sprintf("invalid egress Service: %s", strings.Join(violations, ", "))
|
|
esr.recorder.Event(svc, corev1.EventTypeWarning, "INVALIDSERVICE", msg)
|
|
lg.Info(msg)
|
|
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcValid, metav1.ConditionFalse, reasonEgressSvcInvalid, msg, esr.clock, lg)
|
|
tsoperator.RemoveServiceCondition(svc, tsapi.EgressSvcConfigured)
|
|
return false, nil
|
|
}
|
|
if !tsoperator.ProxyGroupAvailable(pg) {
|
|
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcValid, metav1.ConditionUnknown, reasonProxyGroupNotReady, reasonProxyGroupNotReady, esr.clock, lg)
|
|
tsoperator.RemoveServiceCondition(svc, tsapi.EgressSvcConfigured)
|
|
}
|
|
|
|
lg.Debugf("egress service is valid")
|
|
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcValid, metav1.ConditionTrue, reasonEgressSvcValid, reasonEgressSvcValid, esr.clock, lg)
|
|
return true, nil
|
|
}
|
|
|
|
func egressSvcCfg(externalNameSvc, clusterIPSvc *corev1.Service, ns string, lg *zap.SugaredLogger) egressservices.Config {
|
|
d := retrieveClusterDomain(ns, lg)
|
|
tt := tailnetTargetFromSvc(externalNameSvc)
|
|
hep := healthCheckForSvc(clusterIPSvc, d)
|
|
cfg := egressservices.Config{
|
|
TailnetTarget: tt,
|
|
HealthCheckEndpoint: hep,
|
|
}
|
|
for _, svcPort := range clusterIPSvc.Spec.Ports {
|
|
if svcPort.Name == tsHealthCheckPortName {
|
|
continue // exclude healthcheck from egress svcs configs
|
|
}
|
|
pm := portMap(svcPort)
|
|
mak.Set(&cfg.Ports, pm, struct{}{})
|
|
}
|
|
return cfg
|
|
}
|
|
|
|
func validateEgressService(svc *corev1.Service, pg *tsapi.ProxyGroup) []string {
|
|
violations := validateService(svc)
|
|
|
|
// We check that only one of these two is set in the earlier validateService function.
|
|
if svc.Annotations[AnnotationTailnetTargetFQDN] == "" && svc.Annotations[AnnotationTailnetTargetIP] == "" {
|
|
violations = append(violations, fmt.Sprintf("egress Service for ProxyGroup must have one of %s, %s annotations set", AnnotationTailnetTargetFQDN, AnnotationTailnetTargetIP))
|
|
}
|
|
if len(svc.Spec.Ports) == 0 {
|
|
violations = append(violations, "egress Service for ProxyGroup must have at least one target Port specified")
|
|
}
|
|
if svc.Spec.Type != corev1.ServiceTypeExternalName {
|
|
violations = append(violations, fmt.Sprintf("unexpected egress Service type %s. The only supported type is ExternalName.", svc.Spec.Type))
|
|
}
|
|
if pg.Spec.Type != tsapi.ProxyGroupTypeEgress {
|
|
violations = append(violations, fmt.Sprintf("egress Service references ProxyGroup of type %s, must be type %s", pg.Spec.Type, tsapi.ProxyGroupTypeEgress))
|
|
}
|
|
return violations
|
|
}
|
|
|
|
// egressSvcNameBase returns a name base that can be passed to
|
|
// ObjectMeta.GenerateName to generate a name for the ClusterIP Service.
|
|
// The generated name needs to be short enough so that it can later be used to
|
|
// generate a valid Kubernetes resource name for the EndpointSlice in form
|
|
// 'ipv4-|ipv6-<ClusterIP Service name>.
|
|
// A valid Kubernetes resource name must not be longer than 253 chars.
|
|
func svcNameBase(s string) string {
|
|
// -ipv4 - ipv6
|
|
const maxClusterIPSvcNameLength = 253 - 5
|
|
base := fmt.Sprintf("ts-%s-", s)
|
|
generator := names.SimpleNameGenerator
|
|
for {
|
|
generatedName := generator.GenerateName(base)
|
|
excess := len(generatedName) - maxClusterIPSvcNameLength
|
|
if excess <= 0 {
|
|
return base
|
|
}
|
|
base = base[:len(base)-1-excess] // cut off the excess chars
|
|
base = base + "-" // re-instate the dash
|
|
}
|
|
}
|
|
|
|
// unusedPort returns a port in range [10000 - 11000). The caller must ensure that
|
|
// usedPorts does not contain all ports in range [10000 - 11000).
|
|
func unusedPort(usedPorts sets.Set[int32]) int32 {
|
|
foundFreePort := false
|
|
var suggestPort int32
|
|
for !foundFreePort {
|
|
suggestPort = rand.Int32N(maxPorts) + 10000
|
|
if !usedPorts.Has(suggestPort) {
|
|
foundFreePort = true
|
|
}
|
|
}
|
|
return suggestPort
|
|
}
|
|
|
|
// tailnetTargetFromSvc returns a tailnet target for the given egress Service.
|
|
// Service must contain exactly one of tailscale.com/tailnet-ip,
|
|
// tailscale.com/tailnet-fqdn annotations.
|
|
func tailnetTargetFromSvc(svc *corev1.Service) egressservices.TailnetTarget {
|
|
if fqdn := svc.Annotations[AnnotationTailnetTargetFQDN]; fqdn != "" {
|
|
return egressservices.TailnetTarget{
|
|
FQDN: fqdn,
|
|
}
|
|
}
|
|
return egressservices.TailnetTarget{
|
|
IP: svc.Annotations[AnnotationTailnetTargetIP],
|
|
}
|
|
}
|
|
|
|
func portMap(p corev1.ServicePort) egressservices.PortMap {
|
|
// TODO (irbekrm): out of bounds check?
|
|
return egressservices.PortMap{
|
|
Protocol: string(p.Protocol),
|
|
MatchPort: uint16(p.TargetPort.IntVal),
|
|
TargetPort: uint16(p.Port),
|
|
}
|
|
}
|
|
|
|
func isEgressSvcForProxyGroup(obj client.Object) bool {
|
|
s, ok := obj.(*corev1.Service)
|
|
if !ok {
|
|
return false
|
|
}
|
|
annots := s.ObjectMeta.Annotations
|
|
return annots[AnnotationProxyGroup] != "" && (annots[AnnotationTailnetTargetFQDN] != "" || annots[AnnotationTailnetTargetIP] != "")
|
|
}
|
|
|
|
// egressSvcConfig returns a ConfigMap that contains egress services configuration for the provided ProxyGroup as well
|
|
// as unmarshalled configuration from the ConfigMap.
|
|
func egressSvcsConfigs(ctx context.Context, cl client.Client, proxyGroupName, tsNamespace string) (cm *corev1.ConfigMap, cfgs *egressservices.Configs, err error) {
|
|
name := pgEgressCMName(proxyGroupName)
|
|
cm = &corev1.ConfigMap{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: name,
|
|
Namespace: tsNamespace,
|
|
},
|
|
}
|
|
err = cl.Get(ctx, client.ObjectKeyFromObject(cm), cm)
|
|
if apierrors.IsNotFound(err) { // ProxyGroup resources have not been created (yet)
|
|
return nil, nil, nil
|
|
}
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("error retrieving egress services ConfigMap %s: %v", name, err)
|
|
}
|
|
cfgs = &egressservices.Configs{}
|
|
if len(cm.BinaryData[egressservices.KeyEgressServices]) != 0 {
|
|
if err := json.Unmarshal(cm.BinaryData[egressservices.KeyEgressServices], cfgs); err != nil {
|
|
return nil, nil, fmt.Errorf("error unmarshaling egress services config %v: %w", cm.BinaryData[egressservices.KeyEgressServices], err)
|
|
}
|
|
}
|
|
return cm, cfgs, nil
|
|
}
|
|
|
|
// egressSvcChildResourceLabels returns labels that should be applied to the
|
|
// ClusterIP Service and the EndpointSlice created for the egress service.
|
|
// TODO(irbekrm): we currently set a bunch of labels based on Kubernetes
|
|
// resource names (ProxyGroup, Service). Maximum allowed label length is 63
|
|
// chars whilst the maximum allowed resource name length is 253 chars, so we
|
|
// should probably validate and truncate (?) the names is they are too long.
|
|
func egressSvcChildResourceLabels(svc *corev1.Service) map[string]string {
|
|
return map[string]string{
|
|
kubetypes.LabelManaged: "true",
|
|
LabelParentType: "svc",
|
|
LabelParentName: svc.Name,
|
|
LabelParentNamespace: svc.Namespace,
|
|
labelProxyGroup: svc.Annotations[AnnotationProxyGroup],
|
|
labelSvcType: typeEgress,
|
|
}
|
|
}
|
|
|
|
// egressEpsLabels returns labels to be added to an EndpointSlice created for an egress service.
|
|
func egressSvcEpsLabels(extNSvc, clusterIPSvc *corev1.Service) map[string]string {
|
|
lbels := egressSvcChildResourceLabels(extNSvc)
|
|
// Adding this label is what makes kube proxy set up rules to route traffic sent to the clusterIP Service to the
|
|
// endpoints defined on this EndpointSlice.
|
|
// https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#ownership
|
|
lbels[discoveryv1.LabelServiceName] = clusterIPSvc.Name
|
|
// Kubernetes recommends setting this label.
|
|
// https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#management
|
|
lbels[discoveryv1.LabelManagedBy] = "tailscale.com"
|
|
return lbels
|
|
}
|
|
|
|
func svcConfigurationUpToDate(svc *corev1.Service, lg *zap.SugaredLogger) bool {
|
|
cond := tsoperator.GetServiceCondition(svc, tsapi.EgressSvcConfigured)
|
|
if cond == nil {
|
|
return false
|
|
}
|
|
if cond.Status != metav1.ConditionTrue {
|
|
return false
|
|
}
|
|
wantsReadyReason := svcConfiguredReason(svc, true, lg)
|
|
return strings.EqualFold(wantsReadyReason, cond.Reason)
|
|
}
|
|
|
|
func cfgHash(c cfg, lg *zap.SugaredLogger) string {
|
|
bs, err := json.Marshal(c)
|
|
if err != nil {
|
|
// Don't use l.Error as that messes up component logs with, in this case, unnecessary stack trace.
|
|
lg.Infof("error marhsalling Config: %v", err)
|
|
return ""
|
|
}
|
|
h := sha256.New()
|
|
if _, err := h.Write(bs); err != nil {
|
|
// Don't use l.Error as that messes up component logs with, in this case, unnecessary stack trace.
|
|
lg.Infof("error producing Config hash: %v", err)
|
|
return ""
|
|
}
|
|
return fmt.Sprintf("%x", h.Sum(nil))
|
|
}
|
|
|
|
type cfg struct {
|
|
Ports []corev1.ServicePort `json:"ports"`
|
|
TailnetTarget egressservices.TailnetTarget `json:"tailnetTarget"`
|
|
ProxyGroup string `json:"proxyGroup"`
|
|
}
|
|
|
|
func svcConfiguredReason(svc *corev1.Service, configured bool, lg *zap.SugaredLogger) string {
|
|
var r string
|
|
if configured {
|
|
r = "ConfiguredFor:"
|
|
} else {
|
|
r = fmt.Sprintf("ConfigurationFailed:%s", r)
|
|
}
|
|
r += fmt.Sprintf("ProxyGroup:%s", svc.Annotations[AnnotationProxyGroup])
|
|
tt := tailnetTargetFromSvc(svc)
|
|
s := cfg{
|
|
Ports: svc.Spec.Ports,
|
|
TailnetTarget: tt,
|
|
ProxyGroup: svc.Annotations[AnnotationProxyGroup],
|
|
}
|
|
r += fmt.Sprintf(":Config:%s", cfgHash(s, lg))
|
|
return r
|
|
}
|
|
|
|
// tailnetSvc accepts and ExternalName Service name and returns a name that will be used to distinguish this tailnet
|
|
// service from other tailnet services exposed to cluster workloads.
|
|
func tailnetSvcName(extNSvc *corev1.Service) string {
|
|
return fmt.Sprintf("%s-%s", extNSvc.Namespace, extNSvc.Name)
|
|
}
|
|
|
|
// epsPortsFromSvc takes the ClusterIP Service created for an egress service and
|
|
// returns its Port array in a form that can be used for an EndpointSlice.
|
|
func epsPortsFromSvc(svc *corev1.Service) (ep []discoveryv1.EndpointPort) {
|
|
for _, p := range svc.Spec.Ports {
|
|
ep = append(ep, discoveryv1.EndpointPort{
|
|
Protocol: &p.Protocol,
|
|
Port: &p.TargetPort.IntVal,
|
|
Name: &p.Name,
|
|
})
|
|
}
|
|
return ep
|
|
}
|
|
|
|
// updateSvcSpec ensures that the given Service's spec is updated in cluster, but the local Service object still retains
|
|
// the not-yet-applied status.
|
|
// TODO(irbekrm): once we do SSA for these patch updates, this will no longer be needed.
|
|
func (esr *egressSvcsReconciler) updateSvcSpec(ctx context.Context, svc *corev1.Service) error {
|
|
st := svc.Status.DeepCopy()
|
|
err := esr.Update(ctx, svc)
|
|
svc.Status = *st
|
|
return err
|
|
}
|
|
|
|
// healthCheckForSvc return the URL of the containerboot's health check endpoint served by this Service or empty string.
|
|
func healthCheckForSvc(svc *corev1.Service, clusterDomain string) string {
|
|
// This version of the operator always sets health check port on the egress Services. However, it is possible
|
|
// that this reconcile loops runs during a proxy upgrade from a version that did not set the health check port
|
|
// and parses a Service that does not have the port set yet.
|
|
i := slices.IndexFunc(svc.Spec.Ports, func(port corev1.ServicePort) bool {
|
|
return port.Name == tsHealthCheckPortName
|
|
})
|
|
if i == -1 {
|
|
return ""
|
|
}
|
|
return fmt.Sprintf("http://%s.%s.svc.%s:%d/healthz", svc.Name, svc.Namespace, clusterDomain, svc.Spec.Ports[i].Port)
|
|
}
|