mirror of
https://github.com/tailscale/tailscale.git
synced 2026-05-05 20:26:47 +02:00
This file was never truly necessary and has never actually been used in the history of Tailscale's open source releases. A Brief History of AUTHORS files --- The AUTHORS file was a pattern developed at Google, originally for Chromium, then adopted by Go and a bunch of other projects. The problem was that Chromium originally had a copyright line only recognizing Google as the copyright holder. Because Google (and most open source projects) do not require copyright assignemnt for contributions, each contributor maintains their copyright. Some large corporate contributors then tried to add their own name to the copyright line in the LICENSE file or in file headers. This quickly becomes unwieldy, and puts a tremendous burden on anyone building on top of Chromium, since the license requires that they keep all copyright lines intact. The compromise was to create an AUTHORS file that would list all of the copyright holders. The LICENSE file and source file headers would then include that list by reference, listing the copyright holder as "The Chromium Authors". This also become cumbersome to simply keep the file up to date with a high rate of new contributors. Plus it's not always obvious who the copyright holder is. Sometimes it is the individual making the contribution, but many times it may be their employer. There is no way for the proejct maintainer to know. Eventually, Google changed their policy to no longer recommend trying to keep the AUTHORS file up to date proactively, and instead to only add to it when requested: https://opensource.google/docs/releasing/authors. They are also clear that: > Adding contributors to the AUTHORS file is entirely within the > project's discretion and has no implications for copyright ownership. It was primarily added to appease a small number of large contributors that insisted that they be recognized as copyright holders (which was entirely their right to do). But it's not truly necessary, and not even the most accurate way of identifying contributors and/or copyright holders. In practice, we've never added anyone to our AUTHORS file. It only lists Tailscale, so it's not really serving any purpose. It also causes confusion because Tailscalars put the "Tailscale Inc & AUTHORS" header in other open source repos which don't actually have an AUTHORS file, so it's ambiguous what that means. Instead, we just acknowledge that the contributors to Tailscale (whoever they are) are copyright holders for their individual contributions. We also have the benefit of using the DCO (developercertificate.org) which provides some additional certification of their right to make the contribution. The source file changes were purely mechanical with: git ls-files | xargs sed -i -e 's/\(Tailscale Inc &\) AUTHORS/\1 contributors/g' Updates #cleanup Change-Id: Ia101a4a3005adb9118051b3416f5a64a4a45987d Signed-off-by: Will Norris <will@tailscale.com>
275 lines
11 KiB
Go
275 lines
11 KiB
Go
// Copyright (c) Tailscale Inc & contributors
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build !plan9
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"slices"
|
|
"strings"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"go.uber.org/zap"
|
|
xslices "golang.org/x/exp/slices"
|
|
corev1 "k8s.io/api/core/v1"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
|
"tailscale.com/kube/kubetypes"
|
|
"tailscale.com/tstime"
|
|
"tailscale.com/util/backoff"
|
|
"tailscale.com/util/httpm"
|
|
)
|
|
|
|
const tsEgressReadinessGate = "tailscale.com/egress-services"
|
|
|
|
// egressPodsReconciler is responsible for setting tailscale.com/egress-services condition on egress ProxyGroup Pods.
|
|
// The condition is used as a readiness gate for the Pod, meaning that kubelet will not mark the Pod as ready before the
|
|
// condition is set. The ProxyGroup StatefulSet updates are rolled out in such a way that no Pod is restarted, before
|
|
// the previous Pod is marked as ready, so ensuring that the Pod does not get marked as ready when it is not yet able to
|
|
// route traffic for egress service prevents downtime during restarts caused by no available endpoints left because
|
|
// every Pod has been recreated and is not yet added to endpoints.
|
|
// https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-readiness-gate
|
|
type egressPodsReconciler struct {
|
|
client.Client
|
|
logger *zap.SugaredLogger
|
|
tsNamespace string
|
|
clock tstime.Clock
|
|
httpClient doer // http client that can be set to a mock client in tests
|
|
maxBackoff time.Duration // max backoff period between health check calls
|
|
}
|
|
|
|
// Reconcile reconciles an egress ProxyGroup Pods on changes to those Pods and ProxyGroup EndpointSlices. It ensures
|
|
// that for each Pod who is ready to route traffic to all egress services for the ProxyGroup, the Pod has a
|
|
// tailscale.com/egress-services condition to set, so that kubelet will mark the Pod as ready.
|
|
//
|
|
// For the Pod to be ready
|
|
// to route traffic to the egress service, the kube proxy needs to have set up the Pod's IP as an endpoint for the
|
|
// ClusterIP Service corresponding to the egress service.
|
|
//
|
|
// Note that the endpoints for the ClusterIP Service are configured by the operator itself using custom
|
|
// EndpointSlices(egress-eps-reconciler), so the routing is not blocked on Pod's readiness.
|
|
//
|
|
// Each egress service has a corresponding ClusterIP Service, that exposes all user configured
|
|
// tailnet ports, as well as a health check port for the proxy.
|
|
//
|
|
// The reconciler calls the health check endpoint of each Service up to N number of times, where N is the number of
|
|
// replicas for the ProxyGroup x 3, and checks if the received response is healthy response from the Pod being reconciled.
|
|
//
|
|
// The health check response contains a header with the
|
|
// Pod's IP address- this is used to determine whether the response is received from this Pod.
|
|
//
|
|
// If the Pod does not appear to be serving the health check endpoint (pre-v1.80 proxies), the reconciler just sets the
|
|
// readiness condition for backwards compatibility reasons.
|
|
func (er *egressPodsReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
|
lg := er.logger.With("Pod", req.NamespacedName)
|
|
lg.Debugf("starting reconcile")
|
|
defer lg.Debugf("reconcile finished")
|
|
|
|
pod := new(corev1.Pod)
|
|
err = er.Get(ctx, req.NamespacedName, pod)
|
|
if apierrors.IsNotFound(err) {
|
|
return reconcile.Result{}, nil
|
|
}
|
|
if err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("failed to get Pod: %w", err)
|
|
}
|
|
if !pod.DeletionTimestamp.IsZero() {
|
|
lg.Debugf("Pod is being deleted, do nothing")
|
|
return res, nil
|
|
}
|
|
if pod.Labels[LabelParentType] != proxyTypeProxyGroup {
|
|
lg.Infof("[unexpected] reconciler called for a Pod that is not a ProxyGroup Pod")
|
|
return res, nil
|
|
}
|
|
|
|
// If the Pod does not have the readiness gate set, there is no need to add the readiness condition. In practice
|
|
// this will happen if the user has configured custom TS_LOCAL_ADDR_PORT, thus disabling the graceful failover.
|
|
if !slices.ContainsFunc(pod.Spec.ReadinessGates, func(r corev1.PodReadinessGate) bool {
|
|
return r.ConditionType == tsEgressReadinessGate
|
|
}) {
|
|
lg.Debug("Pod does not have egress readiness gate set, skipping")
|
|
return res, nil
|
|
}
|
|
|
|
proxyGroupName := pod.Labels[LabelParentName]
|
|
pg := new(tsapi.ProxyGroup)
|
|
if err := er.Get(ctx, types.NamespacedName{Name: proxyGroupName}, pg); err != nil {
|
|
return res, fmt.Errorf("error getting ProxyGroup %q: %w", proxyGroupName, err)
|
|
}
|
|
if pg.Spec.Type != typeEgress {
|
|
lg.Infof("[unexpected] reconciler called for %q ProxyGroup Pod", pg.Spec.Type)
|
|
return res, nil
|
|
}
|
|
// Get all ClusterIP Services for all egress targets exposed to cluster via this ProxyGroup.
|
|
lbls := map[string]string{
|
|
kubetypes.LabelManaged: "true",
|
|
labelProxyGroup: proxyGroupName,
|
|
labelSvcType: typeEgress,
|
|
}
|
|
svcs := &corev1.ServiceList{}
|
|
if err := er.List(ctx, svcs, client.InNamespace(er.tsNamespace), client.MatchingLabels(lbls)); err != nil {
|
|
return res, fmt.Errorf("error listing ClusterIP Services")
|
|
}
|
|
|
|
idx := xslices.IndexFunc(pod.Status.Conditions, func(c corev1.PodCondition) bool {
|
|
return c.Type == tsEgressReadinessGate
|
|
})
|
|
if idx != -1 {
|
|
lg.Debugf("Pod is already ready, do nothing")
|
|
return res, nil
|
|
}
|
|
|
|
var routesMissing atomic.Bool
|
|
errChan := make(chan error, len(svcs.Items))
|
|
for _, svc := range svcs.Items {
|
|
s := svc
|
|
go func() {
|
|
ll := lg.With("service_name", s.Name)
|
|
d := retrieveClusterDomain(er.tsNamespace, ll)
|
|
healthCheckAddr := healthCheckForSvc(&s, d)
|
|
if healthCheckAddr == "" {
|
|
ll.Debugf("ClusterIP Service does not expose a health check endpoint, unable to verify if routing is set up")
|
|
errChan <- nil
|
|
return
|
|
}
|
|
|
|
var routesSetup bool
|
|
bo := backoff.NewBackoff(s.Name, ll.Infof, er.maxBackoff)
|
|
for range numCalls(pgReplicas(pg)) {
|
|
if ctx.Err() != nil {
|
|
errChan <- nil
|
|
return
|
|
}
|
|
state, err := er.lookupPodRouteViaSvc(ctx, pod, healthCheckAddr, ll)
|
|
if err != nil {
|
|
errChan <- fmt.Errorf("error validating if routing has been set up for Pod: %w", err)
|
|
return
|
|
}
|
|
if state == healthy || state == cannotVerify {
|
|
routesSetup = true
|
|
break
|
|
}
|
|
if state == unreachable || state == unhealthy || state == podNotReady {
|
|
bo.BackOff(ctx, errors.New("backoff"))
|
|
}
|
|
}
|
|
if !routesSetup {
|
|
ll.Debugf("Pod is not yet configured as Service endpoint")
|
|
routesMissing.Store(true)
|
|
}
|
|
errChan <- nil
|
|
}()
|
|
}
|
|
for range len(svcs.Items) {
|
|
e := <-errChan
|
|
err = errors.Join(err, e)
|
|
}
|
|
if err != nil {
|
|
return res, fmt.Errorf("error verifying connectivity: %w", err)
|
|
}
|
|
if rm := routesMissing.Load(); rm {
|
|
lg.Info("Pod is not yet added as an endpoint for all egress targets, waiting...")
|
|
return reconcile.Result{RequeueAfter: shortRequeue}, nil
|
|
}
|
|
if err := er.setPodReady(ctx, pod, lg); err != nil {
|
|
return res, fmt.Errorf("error setting Pod as ready: %w", err)
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
func (er *egressPodsReconciler) setPodReady(ctx context.Context, pod *corev1.Pod, lg *zap.SugaredLogger) error {
|
|
if slices.ContainsFunc(pod.Status.Conditions, func(c corev1.PodCondition) bool {
|
|
return c.Type == tsEgressReadinessGate
|
|
}) {
|
|
return nil
|
|
}
|
|
lg.Infof("Pod is ready to route traffic to all egress targets")
|
|
pod.Status.Conditions = append(pod.Status.Conditions, corev1.PodCondition{
|
|
Type: tsEgressReadinessGate,
|
|
Status: corev1.ConditionTrue,
|
|
LastTransitionTime: metav1.Time{Time: er.clock.Now()},
|
|
})
|
|
return er.Status().Update(ctx, pod)
|
|
}
|
|
|
|
// healthCheckState is the result of a single request to an egress Service health check endpoint with a goal to hit a
|
|
// specific backend Pod.
|
|
type healthCheckState int8
|
|
|
|
const (
|
|
cannotVerify healthCheckState = iota // not verifiable for this setup (i.e earlier proxy version)
|
|
unreachable // no backends or another network error
|
|
notFound // hit another backend
|
|
unhealthy // not 200
|
|
podNotReady // Pod is not ready, i.e does not have an IP address yet
|
|
healthy // 200
|
|
)
|
|
|
|
// lookupPodRouteViaSvc attempts to reach a Pod using a health check endpoint served by a Service and returns the state of the health check.
|
|
func (er *egressPodsReconciler) lookupPodRouteViaSvc(ctx context.Context, pod *corev1.Pod, healthCheckAddr string, lg *zap.SugaredLogger) (healthCheckState, error) {
|
|
if !slices.ContainsFunc(pod.Spec.Containers[0].Env, func(e corev1.EnvVar) bool {
|
|
return e.Name == "TS_ENABLE_HEALTH_CHECK" && e.Value == "true"
|
|
}) {
|
|
lg.Debugf("Pod does not have health check enabled, unable to verify if it is currently routable via Service")
|
|
return cannotVerify, nil
|
|
}
|
|
wantsIP, err := podIPv4(pod)
|
|
if err != nil {
|
|
return -1, fmt.Errorf("error determining Pod's IP address: %w", err)
|
|
}
|
|
if wantsIP == "" {
|
|
return podNotReady, nil
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, time.Second*3)
|
|
defer cancel()
|
|
req, err := http.NewRequestWithContext(ctx, httpm.GET, healthCheckAddr, nil)
|
|
if err != nil {
|
|
return -1, fmt.Errorf("error creating new HTTP request: %w", err)
|
|
}
|
|
// Do not re-use the same connection for the next request so to maximize the chance of hitting all backends equally.
|
|
req.Close = true
|
|
resp, err := er.httpClient.Do(req)
|
|
if err != nil {
|
|
// This is most likely because this is the first Pod and is not yet added to service endpoints. Other
|
|
// error types are possible, but checking for those would likely make the system too fragile.
|
|
return unreachable, nil
|
|
}
|
|
defer resp.Body.Close()
|
|
gotIP := resp.Header.Get(kubetypes.PodIPv4Header)
|
|
if gotIP == "" {
|
|
lg.Debugf("Health check does not return Pod's IP header, unable to verify if Pod is currently routable via Service")
|
|
return cannotVerify, nil
|
|
}
|
|
if !strings.EqualFold(wantsIP, gotIP) {
|
|
return notFound, nil
|
|
}
|
|
if resp.StatusCode != http.StatusOK {
|
|
return unhealthy, nil
|
|
}
|
|
return healthy, nil
|
|
}
|
|
|
|
// numCalls return the number of times an endpoint on a ProxyGroup Service should be called till it can be safely
|
|
// assumed that, if none of the responses came back from a specific Pod then traffic for the Service is currently not
|
|
// being routed to that Pod. This assumes that traffic for the Service is routed via round robin, so
|
|
// InternalTrafficPolicy must be 'Cluster' and session affinity must be None.
|
|
func numCalls(replicas int32) int32 {
|
|
return replicas * 3
|
|
}
|
|
|
|
// doer is an interface for HTTP client that can be set to a mock client in tests.
|
|
type doer interface {
|
|
Do(*http.Request) (*http.Response, error)
|
|
}
|