mirror of
https://github.com/tailscale/tailscale.git
synced 2026-05-06 12:46:20 +02:00
* cmd/k8s-operator: truncate long label values in metrics resources Kubernetes label values have a 63-character limit, but resource names can be up to 253 characters. When a Service or Ingress with a long name is exposed via Tailscale, the operator fails to reconcile because it uses the parent resource name directly as label values on metrics Services. Truncate label values that may exceed the limit by keeping the first 54 characters and appending a SHA256-based hash suffix to preserve uniqueness. Fixes #18894 Signed-off-by: Daniel Pañeda <daniel.paneda@clickhouse.com> Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk> * cmd/k8s-operator: move TruncateLabelValue to shared k8s-operator package Move the label truncation helper to k8s-operator/utils.go so it can be reused by other components that need to produce valid Kubernetes labels. Signed-off-by: Daniel Pañeda <daniel.paneda@clickhouse.com> Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk> * cmd/k8s-operator: truncate long domain label values in cert resources Applies TruncateLabelValue to certResourceLabels in order to prevent API server validation failures. This covers both the HA Ingress and kube-apiserver proxy reconcilers, as both flow through certResourceLabels. Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk> * cmd/k8s-operator: remove empty metrics_resources_test.go, use hyphens in test names to satisfy go vet Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk> --------- Signed-off-by: Daniel Pañeda <daniel.paneda@clickhouse.com> Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk> Co-authored-by: chaosinthecrd <tom@tmlabs.co.uk>
295 lines
12 KiB
Go
295 lines
12 KiB
Go
// Copyright (c) Tailscale Inc & contributors
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
//go:build !plan9
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"maps"
|
|
"reflect"
|
|
|
|
"go.uber.org/zap"
|
|
corev1 "k8s.io/api/core/v1"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
kube "tailscale.com/k8s-operator"
|
|
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
|
"tailscale.com/kube/kubetypes"
|
|
)
|
|
|
|
const (
|
|
labelMetricsTarget = "tailscale.com/metrics-target"
|
|
|
|
// These labels get transferred from the metrics Service to the ingested Prometheus metrics.
|
|
labelPromProxyType = "ts_proxy_type"
|
|
labelPromProxyParentName = "ts_proxy_parent_name"
|
|
labelPromProxyParentNamespace = "ts_proxy_parent_namespace"
|
|
labelPromJob = "ts_prom_job"
|
|
|
|
serviceMonitorCRD = "servicemonitors.monitoring.coreos.com"
|
|
)
|
|
|
|
// ServiceMonitor contains a subset of fields of servicemonitors.monitoring.coreos.com Custom Resource Definition.
|
|
// Duplicating it here allows us to avoid importing prometheus-operator library.
|
|
// https://github.com/prometheus-operator/prometheus-operator/blob/bb4514e0d5d69f20270e29cfd4ad39b87865ccdf/pkg/apis/monitoring/v1/servicemonitor_types.go#L40
|
|
type ServiceMonitor struct {
|
|
metav1.TypeMeta `json:",inline"`
|
|
metav1.ObjectMeta `json:"metadata"`
|
|
Spec ServiceMonitorSpec `json:"spec"`
|
|
}
|
|
|
|
// https://github.com/prometheus-operator/prometheus-operator/blob/bb4514e0d5d69f20270e29cfd4ad39b87865ccdf/pkg/apis/monitoring/v1/servicemonitor_types.go#L55
|
|
type ServiceMonitorSpec struct {
|
|
// Endpoints defines the endpoints to be scraped on the selected Service(s).
|
|
// https://github.com/prometheus-operator/prometheus-operator/blob/bb4514e0d5d69f20270e29cfd4ad39b87865ccdf/pkg/apis/monitoring/v1/servicemonitor_types.go#L82
|
|
Endpoints []ServiceMonitorEndpoint `json:"endpoints"`
|
|
// JobLabel is the label on the Service whose value will become the value of the Prometheus job label for the metrics ingested via this ServiceMonitor.
|
|
// https://github.com/prometheus-operator/prometheus-operator/blob/bb4514e0d5d69f20270e29cfd4ad39b87865ccdf/pkg/apis/monitoring/v1/servicemonitor_types.go#L66
|
|
JobLabel string `json:"jobLabel"`
|
|
// NamespaceSelector selects the namespace of Service(s) that this ServiceMonitor allows to scrape.
|
|
// https://github.com/prometheus-operator/prometheus-operator/blob/bb4514e0d5d69f20270e29cfd4ad39b87865ccdf/pkg/apis/monitoring/v1/servicemonitor_types.go#L88
|
|
NamespaceSelector ServiceMonitorNamespaceSelector `json:"namespaceSelector,omitempty"`
|
|
// Selector is the label selector for Service(s) that this ServiceMonitor allows to scrape.
|
|
// https://github.com/prometheus-operator/prometheus-operator/blob/bb4514e0d5d69f20270e29cfd4ad39b87865ccdf/pkg/apis/monitoring/v1/servicemonitor_types.go#L85
|
|
Selector metav1.LabelSelector `json:"selector"`
|
|
// TargetLabels are labels on the selected Service that should be applied as Prometheus labels to the ingested metrics.
|
|
// https://github.com/prometheus-operator/prometheus-operator/blob/bb4514e0d5d69f20270e29cfd4ad39b87865ccdf/pkg/apis/monitoring/v1/servicemonitor_types.go#L72
|
|
TargetLabels []string `json:"targetLabels"`
|
|
}
|
|
|
|
// ServiceMonitorNamespaceSelector selects namespaces in which Prometheus operator will attempt to find Services for
|
|
// this ServiceMonitor.
|
|
// https://github.com/prometheus-operator/prometheus-operator/blob/bb4514e0d5d69f20270e29cfd4ad39b87865ccdf/pkg/apis/monitoring/v1/servicemonitor_types.go#L88
|
|
type ServiceMonitorNamespaceSelector struct {
|
|
MatchNames []string `json:"matchNames,omitempty"`
|
|
}
|
|
|
|
// ServiceMonitorEndpoint defines an endpoint of Service to scrape. We only define port here. Prometheus by default
|
|
// scrapes /metrics path, which is what we want.
|
|
type ServiceMonitorEndpoint struct {
|
|
// Port is the name of the Service port that Prometheus will scrape.
|
|
Port string `json:"port,omitempty"`
|
|
}
|
|
|
|
func reconcileMetricsResources(ctx context.Context, logger *zap.SugaredLogger, opts *metricsOpts, pc *tsapi.ProxyClass, cl client.Client) error {
|
|
if opts.proxyType == proxyTypeEgress {
|
|
// Metrics are currently not being enabled for standalone egress proxies.
|
|
return nil
|
|
}
|
|
if pc == nil || pc.Spec.Metrics == nil || !pc.Spec.Metrics.Enable {
|
|
return maybeCleanupMetricsResources(ctx, opts, cl)
|
|
}
|
|
metricsSvc := &corev1.Service{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: metricsResourceName(opts.proxyStsName),
|
|
Namespace: opts.tsNamespace,
|
|
Labels: metricsResourceLabels(opts),
|
|
},
|
|
Spec: corev1.ServiceSpec{
|
|
Selector: opts.proxyLabels,
|
|
Type: corev1.ServiceTypeClusterIP,
|
|
Ports: []corev1.ServicePort{{Protocol: "TCP", Port: 9002, Name: "metrics"}},
|
|
},
|
|
}
|
|
var err error
|
|
metricsSvc, err = createOrUpdate(ctx, cl, opts.tsNamespace, metricsSvc, func(svc *corev1.Service) {
|
|
svc.Spec.Ports = metricsSvc.Spec.Ports
|
|
svc.Spec.Selector = metricsSvc.Spec.Selector
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("error ensuring metrics Service: %w", err)
|
|
}
|
|
|
|
crdExists, err := hasServiceMonitorCRD(ctx, cl)
|
|
if err != nil {
|
|
return fmt.Errorf("error verifying that %q CRD exists: %w", serviceMonitorCRD, err)
|
|
}
|
|
if !crdExists {
|
|
return nil
|
|
}
|
|
|
|
if pc.Spec.Metrics.ServiceMonitor == nil || !pc.Spec.Metrics.ServiceMonitor.Enable {
|
|
return maybeCleanupServiceMonitor(ctx, cl, opts.proxyStsName, opts.tsNamespace)
|
|
}
|
|
|
|
logger.Infof("ensuring ServiceMonitor for metrics Service %s/%s", metricsSvc.Namespace, metricsSvc.Name)
|
|
svcMonitor, err := newServiceMonitor(metricsSvc, pc.Spec.Metrics.ServiceMonitor)
|
|
if err != nil {
|
|
return fmt.Errorf("error creating ServiceMonitor: %w", err)
|
|
}
|
|
|
|
// We don't use createOrUpdate here because that does not work with unstructured types.
|
|
existing := svcMonitor.DeepCopy()
|
|
err = cl.Get(ctx, client.ObjectKeyFromObject(metricsSvc), existing)
|
|
if apierrors.IsNotFound(err) {
|
|
if err := cl.Create(ctx, svcMonitor); err != nil {
|
|
return fmt.Errorf("error creating ServiceMonitor: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("error getting ServiceMonitor: %w", err)
|
|
}
|
|
// Currently, we only update labels on the ServiceMonitor as those are the only values that can change.
|
|
if !reflect.DeepEqual(existing.GetLabels(), svcMonitor.GetLabels()) {
|
|
existing.SetLabels(svcMonitor.GetLabels())
|
|
if err := cl.Update(ctx, existing); err != nil {
|
|
return fmt.Errorf("error updating ServiceMonitor: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// maybeCleanupMetricsResources ensures that any metrics resources created for a proxy are deleted. Only metrics Service
|
|
// gets deleted explicitly because the ServiceMonitor has Service's owner reference, so gets garbage collected
|
|
// automatically.
|
|
func maybeCleanupMetricsResources(ctx context.Context, opts *metricsOpts, cl client.Client) error {
|
|
sel := metricsSvcSelector(opts.proxyLabels, opts.proxyType)
|
|
return cl.DeleteAllOf(ctx, &corev1.Service{}, client.InNamespace(opts.tsNamespace), client.MatchingLabels(sel))
|
|
}
|
|
|
|
// maybeCleanupServiceMonitor cleans up any ServiceMonitor created for the named proxy StatefulSet.
|
|
func maybeCleanupServiceMonitor(ctx context.Context, cl client.Client, stsName, ns string) error {
|
|
smName := metricsResourceName(stsName)
|
|
sm := serviceMonitorTemplate(smName, ns)
|
|
u, err := serviceMonitorToUnstructured(sm)
|
|
if err != nil {
|
|
return fmt.Errorf("error building ServiceMonitor: %w", err)
|
|
}
|
|
err = cl.Get(ctx, types.NamespacedName{Name: smName, Namespace: ns}, u)
|
|
if apierrors.IsNotFound(err) {
|
|
return nil // nothing to do
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("error verifying if ServiceMonitor %s/%s exists: %w", ns, stsName, err)
|
|
}
|
|
return cl.Delete(ctx, u)
|
|
}
|
|
|
|
// newServiceMonitor takes a metrics Service created for a proxy and constructs and returns a ServiceMonitor for that
|
|
// proxy that can be applied to the kube API server.
|
|
// The ServiceMonitor is returned as Unstructured type - this allows us to avoid importing prometheus-operator API server client/schema.
|
|
func newServiceMonitor(metricsSvc *corev1.Service, spec *tsapi.ServiceMonitor) (*unstructured.Unstructured, error) {
|
|
sm := serviceMonitorTemplate(metricsSvc.Name, metricsSvc.Namespace)
|
|
sm.ObjectMeta.Labels = metricsSvc.Labels
|
|
if spec != nil && len(spec.Labels) > 0 {
|
|
sm.ObjectMeta.Labels = mergeMapKeys(sm.ObjectMeta.Labels, spec.Labels.Parse())
|
|
}
|
|
|
|
sm.ObjectMeta.OwnerReferences = []metav1.OwnerReference{*metav1.NewControllerRef(metricsSvc, corev1.SchemeGroupVersion.WithKind("Service"))}
|
|
sm.Spec = ServiceMonitorSpec{
|
|
Selector: metav1.LabelSelector{MatchLabels: metricsSvc.Labels},
|
|
Endpoints: []ServiceMonitorEndpoint{{
|
|
Port: "metrics",
|
|
}},
|
|
NamespaceSelector: ServiceMonitorNamespaceSelector{
|
|
MatchNames: []string{metricsSvc.Namespace},
|
|
},
|
|
JobLabel: labelPromJob,
|
|
TargetLabels: []string{
|
|
labelPromProxyParentName,
|
|
labelPromProxyParentNamespace,
|
|
labelPromProxyType,
|
|
},
|
|
}
|
|
return serviceMonitorToUnstructured(sm)
|
|
}
|
|
|
|
// serviceMonitorToUnstructured takes a ServiceMonitor and converts it to Unstructured type that can be used by the c/r
|
|
// client in Kubernetes API server calls.
|
|
func serviceMonitorToUnstructured(sm *ServiceMonitor) (*unstructured.Unstructured, error) {
|
|
contents, err := runtime.DefaultUnstructuredConverter.ToUnstructured(sm)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error converting ServiceMonitor to Unstructured: %w", err)
|
|
}
|
|
u := &unstructured.Unstructured{}
|
|
u.SetUnstructuredContent(contents)
|
|
u.SetGroupVersionKind(sm.GroupVersionKind())
|
|
return u, nil
|
|
}
|
|
|
|
// metricsResourceName returns name for metrics Service and ServiceMonitor for a proxy StatefulSet.
|
|
func metricsResourceName(stsName string) string {
|
|
// Maximum length of StatefulSet name if 52 chars, so this is fine.
|
|
return fmt.Sprintf("%s-metrics", stsName)
|
|
}
|
|
|
|
// metricsResourceLabels constructs labels that will be applied to metrics Service and metrics ServiceMonitor for a
|
|
// proxy.
|
|
func metricsResourceLabels(opts *metricsOpts) map[string]string {
|
|
lbls := map[string]string{
|
|
kubetypes.LabelManaged: "true",
|
|
labelMetricsTarget: opts.proxyStsName,
|
|
labelPromProxyType: opts.proxyType,
|
|
labelPromProxyParentName: kube.TruncateLabelValue(opts.proxyLabels[LabelParentName]),
|
|
}
|
|
// Include namespace label for proxies created for a namespaced type.
|
|
if isNamespacedProxyType(opts.proxyType) {
|
|
lbls[labelPromProxyParentNamespace] = kube.TruncateLabelValue(opts.proxyLabels[LabelParentNamespace])
|
|
}
|
|
lbls[labelPromJob] = kube.TruncateLabelValue(promJobName(opts))
|
|
return lbls
|
|
}
|
|
|
|
// promJobName constructs the value of the Prometheus job label that will apply to all metrics for a ServiceMonitor.
|
|
func promJobName(opts *metricsOpts) string {
|
|
// Include parent resource namespace for proxies created for namespaced types.
|
|
if opts.proxyType == proxyTypeIngressResource || opts.proxyType == proxyTypeIngressService {
|
|
return fmt.Sprintf("ts_%s_%s_%s", opts.proxyType, opts.proxyLabels[LabelParentNamespace], opts.proxyLabels[LabelParentName])
|
|
}
|
|
return fmt.Sprintf("ts_%s_%s", opts.proxyType, opts.proxyLabels[LabelParentName])
|
|
}
|
|
|
|
// metricsSvcSelector returns the minimum label set to uniquely identify a metrics Service for a proxy.
|
|
func metricsSvcSelector(proxyLabels map[string]string, proxyType string) map[string]string {
|
|
sel := map[string]string{
|
|
labelPromProxyType: proxyType,
|
|
labelPromProxyParentName: kube.TruncateLabelValue(proxyLabels[LabelParentName]),
|
|
}
|
|
// Include namespace label for proxies created for a namespaced type.
|
|
if isNamespacedProxyType(proxyType) {
|
|
sel[labelPromProxyParentNamespace] = kube.TruncateLabelValue(proxyLabels[LabelParentNamespace])
|
|
}
|
|
return sel
|
|
}
|
|
|
|
// serviceMonitorTemplate returns a base ServiceMonitor type that, when converted to Unstructured, is a valid type that
|
|
// can be used in kube API server calls via the c/r client.
|
|
func serviceMonitorTemplate(name, ns string) *ServiceMonitor {
|
|
return &ServiceMonitor{
|
|
TypeMeta: metav1.TypeMeta{
|
|
Kind: "ServiceMonitor",
|
|
APIVersion: "monitoring.coreos.com/v1",
|
|
},
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: name,
|
|
Namespace: ns,
|
|
},
|
|
}
|
|
}
|
|
|
|
type metricsOpts struct {
|
|
proxyStsName string // name of StatefulSet for proxy
|
|
tsNamespace string // namespace in which Tailscale is installed
|
|
proxyLabels map[string]string // labels of the proxy StatefulSet
|
|
proxyType string
|
|
}
|
|
|
|
func isNamespacedProxyType(typ string) bool {
|
|
return typ == proxyTypeIngressResource || typ == proxyTypeIngressService
|
|
}
|
|
|
|
func mergeMapKeys(a, b map[string]string) map[string]string {
|
|
m := make(map[string]string, len(a)+len(b))
|
|
maps.Copy(m, b)
|
|
maps.Copy(m, a)
|
|
return m
|
|
}
|