diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 5d14826d8..2e003e120 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -64,6 +64,12 @@ // cluster using the same hostname (in this case, the MagicDNS name of the ingress proxy) // as a non-cluster workload on tailnet. // This is only meant to be configured by the Kubernetes operator. +// - EXPERIMENTAL_AUTH_KEYS_ENDPOINT: if set and if running in Kubernetes, auth +// key will be retrieved by POST request to the endpoint passing service +// account token as an auth token. This is used by the Tailscale Kubernetes +// operator who also runs the endpoint. +// Tailscale IP range to DNAT to. +// - EXPERIMENTAL_TS_VIP // i.e 1.2.3.4 // // When running on Kubernetes, containerboot defaults to storing state in the // "tailscale" kube secret. To store state on local disk instead, set @@ -80,8 +86,10 @@ import ( "encoding/json" "errors" "fmt" + "io" "io/fs" "log" + "net/http" "net/netip" "os" "os/exec" @@ -140,6 +148,10 @@ func main() { TailscaledConfigFilePath: defaultEnv("EXPERIMENTAL_TS_CONFIGFILE_PATH", ""), AllowProxyingClusterTrafficViaIngress: defaultBool("EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS", false), PodIP: defaultEnv("POD_IP", ""), + PodName: defaultEnv("POD_NAME", ""), + KeysEndpoint: defaultEnv("EXPERIMENTAL_AUTH_KEYS_ENDPOINT", ""), + KubeStateSecret: defaultEnv("EXPERIMENTAL_KUBE_STATE_SECRET", ""), + TSVIP: defaultEnv("EXPERIMENTAL_TS_VIP", ""), } if err := cfg.validate(); err != nil { @@ -180,7 +192,10 @@ func main() { } cfg.KubernetesCanPatch = canPatch - if cfg.AuthKey == "" && !isOneStepConfig(cfg) { + // TODO: check that can do token request maybe? + + // TODO: did I break something here? + if authKeySourceIsKubeSecret(cfg) { key, err := findKeyInKubeSecret(bootCtx, cfg.KubeSecret) if err != nil { log.Fatalf("Getting authkey from kube secret: %v", err) @@ -252,6 +267,18 @@ func main() { } didLogin = true w.Close() + if cfg.KeysEndpoint != "" { + log.Printf("Creating Tailscale authkey by calling %s", cfg.KeysEndpoint) + key, err := getAuthKey(context.Background(), cfg) + if err != nil { + log.Fatalf("error getting Tailscale auth key: %v", err) + } + // TODO: this will not work with declarative config file + // that wants the auth key in there- figure out how to + // fix + // (So for now this does not work with Connector proxies) + cfg.AuthKey = string(key) + } if err := tailscaleUp(bootCtx, cfg); err != nil { return fmt.Errorf("failed to auth tailscale: %v", err) } @@ -262,6 +289,8 @@ func main() { return nil } + // Never with the Tailscale Kubernetes operator as it always sets + // cfg.AuthOnce=true if isTwoStepConfigAlwaysAuth(cfg) { if err := authTailscale(); err != nil { log.Fatalf("failed to auth tailscale: %v", err) @@ -279,13 +308,17 @@ authLoop: switch *n.State { case ipn.NeedsLogin: if isOneStepConfig(cfg) { - // This could happen if this is the - // first time tailscaled was run for - // this device and the auth key was not - // passed via the configfile. + // if state secret is set, delete it to + // ensure that we start from a clean + // slate on next restart. This could + // happen if this is the first time + // tailscaled was run for this device + // and the auth key was not passed via + // the configfile. log.Fatalf("invalid state: tailscaled daemon started with a config file, but tailscale is not logged in: ensure you pass a valid auth key in the config file.") } if err := authTailscale(); err != nil { + // delete state secret if set log.Fatalf("failed to auth tailscale: %v", err) } case ipn.NeedsMachineAuth: @@ -376,6 +409,18 @@ authLoop: } }() var wg sync.WaitGroup + // We only need to do this once. Backend target change or VIP change + // comes in via env var change which would trigger restart. + if cfg.ProxyTo != "" && cfg.TSVIP != "" { + netIP, err := netip.ParsePrefix(cfg.TSVIP) + if err != nil { + log.Fatalf("error parsing VIP %s: %v", cfg.TSVIP, err) + } + log.Printf("Installing proxy rules for a virtual tailnet IP: %s", cfg.TSVIP) + if err := installIngressForwardingRule(ctx, cfg.ProxyTo, []netip.Prefix{netIP}, nfr); err != nil { + log.Fatalf("installing ingress proxy rules: %v", err) + } + } runLoop: for { @@ -441,7 +486,7 @@ runLoop: } currentEgressIPs = newCurentEgressIPs } - if cfg.ProxyTo != "" && len(addrs) > 0 && ipsHaveChanged { + if cfg.ProxyTo != "" && cfg.TSVIP == "" && len(addrs) > 0 && ipsHaveChanged { log.Printf("Installing proxy rules") if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs, nfr); err != nil { log.Fatalf("installing ingress proxy rules: %v", err) @@ -639,10 +684,18 @@ func startTailscaled(ctx context.Context, cfg *settings) (*tailscale.LocalClient func tailscaledArgs(cfg *settings) []string { args := []string{"--socket=" + cfg.Socket} switch { - case cfg.InKubernetes && cfg.KubeSecret != "": - args = append(args, "--state=kube:"+cfg.KubeSecret) - if cfg.StateDir == "" { - cfg.StateDir = "/tmp" + case cfg.InKubernetes: + if cfg.KeysEndpoint != "" { + stateSecretName := fmt.Sprintf("ts-state-%s", cfg.PodName) + args = append(args, "--state=kube:"+stateSecretName) + if cfg.StateDir == "" { + cfg.StateDir = "/tmp" + } + } else if cfg.KubeSecret != "" { + args = append(args, "--state=kube:"+cfg.KubeSecret) + if cfg.StateDir == "" { + cfg.StateDir = "/tmp" + } } fallthrough case cfg.StateDir != "": @@ -895,6 +948,9 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne if err != nil { return err } + // local can be either the Tailnet IP address of this Tailscale device + // or it can be a tailnet virtual IP that this tailnet node is a backend + // for. var local netip.Addr for _, pfx := range tsIPs { if !pfx.IsSingleIP() { @@ -943,6 +999,7 @@ type settings struct { StateDir string AcceptDNS *bool KubeSecret string + KubeStateSecret string SOCKSProxyAddr string HTTPProxyAddr string Socket string @@ -957,7 +1014,10 @@ type settings struct { // PodIP is the IP of the Pod if running in Kubernetes. This is used // when setting up rules to proxy cluster traffic to cluster ingress // target. - PodIP string + PodIP string + PodName string + KeysEndpoint string + TSVIP string } func (s *settings) validate() error { @@ -990,6 +1050,9 @@ func (s *settings) validate() error { if s.AllowProxyingClusterTrafficViaIngress && s.PodIP == "" { return errors.New("EXPERIMENTAL_ALLOW_PROXYING_CLUSTER_TRAFFIC_VIA_INGRESS is set but POD_IP is not set") } + if s.KeysEndpoint != "" && !s.InKubernetes { + return errors.New("EXPERIMENTAL_AUTH_KEYS_ENDPOINT is set, but the containerboot does not appear to be running on kube") + } return nil } @@ -1089,3 +1152,37 @@ func isTwoStepConfigAlwaysAuth(cfg *settings) bool { func isOneStepConfig(cfg *settings) bool { return cfg.TailscaledConfigFilePath != "" } + +func authKeySourceIsKubeSecret(cfg *settings) bool { + return cfg.InKubernetes && cfg.AuthKey == "" && cfg.KeysEndpoint == "" +} + +func getAuthKey(ctx context.Context, cfg *settings) ([]byte, error) { + client := http.Client{} + // TODO: somewhere check that this has permissions to create a token + token, err := kc.CreateTokenForPod(ctx, cfg.PodName, []string{"ts-keyserver"}) + if err != nil { + return nil, fmt.Errorf("error generating token: %w", err) + } + req, err := http.NewRequestWithContext(ctx, "POST", cfg.KeysEndpoint, nil) + if err != nil { + return nil, fmt.Errorf("error creating new HTTP request: %v", err) + } + req.Header.Add("Authorization", "Bearer "+token) + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("error requesting auth key from URL %s: %w", cfg.KeysEndpoint, err) + } + defer resp.Body.Close() + respBs, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("error reading response body: %w", err) + } + if resp.StatusCode != 200 { // 200 is the only success code returned by the keyserver + return nil, fmt.Errorf("auth key response %s with unexpected status code %d", string(respBs), resp.StatusCode) + } + if len(respBs) == 0 { + return nil, errors.New("unexpected empty response") + } + return respBs, nil +} diff --git a/cmd/k8s-operator/deploy/chart/templates/deployment.yaml b/cmd/k8s-operator/deploy/chart/templates/deployment.yaml index ba1de9779..f137d00ef 100644 --- a/cmd/k8s-operator/deploy/chart/templates/deployment.yaml +++ b/cmd/k8s-operator/deploy/chart/templates/deployment.yaml @@ -78,6 +78,10 @@ spec: - name: oauth mountPath: /oauth readOnly: true + ports: + - name: keyserver + containerPort: 8443 + protocol: TCP {{- with .Values.operatorConfig.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/cmd/k8s-operator/deploy/chart/templates/keyserver-service.yaml b/cmd/k8s-operator/deploy/chart/templates/keyserver-service.yaml new file mode 100644 index 000000000..39d26a60b --- /dev/null +++ b/cmd/k8s-operator/deploy/chart/templates/keyserver-service.yaml @@ -0,0 +1,16 @@ +# Copyright (c) Tailscale Inc & AUTHORS +# SPDX-License-Identifier: BSD-3-Clause + +apiVersion: v1 +kind: Service +metadata: + name: keyserver + namespace: {{ .Release.Namespace }} +spec: + ports: + - port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app: operator + type: ClusterIP diff --git a/cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml b/cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml index ff518e40c..112c73e9c 100644 --- a/cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml +++ b/cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml @@ -64,3 +64,50 @@ roleRef: kind: Role name: operator apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: keyserver + namespace: {{ .Release.Namespace }} +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get","list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: keyserver + namespace: {{ .Release.Namespace }} +subjects: +- kind: ServiceAccount + name: operator + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: keyserver + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: tailscale-keyserver +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: tailscale-keyserver +subjects: +- kind: ServiceAccount + name: operator + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: tailscale-keyserver +rules: +- apiGroups: ["authentication.k8s.io"] + resources: + - tokenreviews + verbs: ["create"] diff --git a/cmd/k8s-operator/deploy/chart/templates/proxy-rbac.yaml b/cmd/k8s-operator/deploy/chart/templates/proxy-rbac.yaml index 31a034aaa..c367276f6 100644 --- a/cmd/k8s-operator/deploy/chart/templates/proxy-rbac.yaml +++ b/cmd/k8s-operator/deploy/chart/templates/proxy-rbac.yaml @@ -30,3 +30,32 @@ roleRef: kind: Role name: proxies apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: proxies-token +rules: +- apiGroups: [""] + resources: + - "serviceaccounts/token" + - "serviceaccounts" # needed? + verbs: + - "create" + - "get" + resourceNames: + - "proxies" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: proxies-token + namespace: {{ .Release.Namespace }} +subjects: +- kind: ServiceAccount + name: proxies + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: proxies-token + apiGroup: rbac.authorization.k8s.io diff --git a/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml b/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml index 83504f4c0..9d39c25db 100644 --- a/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml +++ b/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml @@ -452,6 +452,9 @@ spec: value: description: Value is the taint value the toleration matches to. If the operator is Exists, the value should be empty, otherwise just a regular string. type: string + replicas: + type: integer + format: int32 status: type: object properties: diff --git a/cmd/k8s-operator/deploy/examples/proxyclass.yaml b/cmd/k8s-operator/deploy/examples/proxyclass.yaml index 121465bab..1e85189a3 100644 --- a/cmd/k8s-operator/deploy/examples/proxyclass.yaml +++ b/cmd/k8s-operator/deploy/examples/proxyclass.yaml @@ -4,6 +4,7 @@ metadata: name: prod spec: statefulSet: + replicas: 2 annotations: platform-component: infra pod: diff --git a/cmd/k8s-operator/deploy/manifests/operator.yaml b/cmd/k8s-operator/deploy/manifests/operator.yaml index 62e444f82..011a6c2d4 100644 --- a/cmd/k8s-operator/deploy/manifests/operator.yaml +++ b/cmd/k8s-operator/deploy/manifests/operator.yaml @@ -603,6 +603,9 @@ spec: type: object type: array type: object + replicas: + format: int32 + type: integer type: object required: - statefulSet @@ -693,6 +696,34 @@ rules: - update --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: tailscale-keyserver +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: proxies-token +rules: + - apiGroups: + - "" + resourceNames: + - proxies + resources: + - serviceaccounts/token + - serviceaccounts + verbs: + - create + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: tailscale-operator @@ -706,6 +737,19 @@ subjects: namespace: tailscale --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: tailscale-keyserver +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: tailscale-keyserver +subjects: + - kind: ServiceAccount + name: operator + namespace: tailscale +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: operator @@ -726,6 +770,21 @@ rules: --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role +metadata: + name: keyserver + namespace: tailscale +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role metadata: name: proxies namespace: tailscale @@ -753,6 +812,20 @@ subjects: --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding +metadata: + name: keyserver + namespace: tailscale +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: keyserver +subjects: + - kind: ServiceAccount + name: operator + namespace: tailscale +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding metadata: name: proxies namespace: tailscale @@ -765,6 +838,34 @@ subjects: name: proxies namespace: tailscale --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: proxies-token + namespace: tailscale +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: proxies-token +subjects: + - kind: ServiceAccount + name: proxies + namespace: tailscale +--- +apiVersion: v1 +kind: Service +metadata: + name: keyserver + namespace: tailscale +spec: + ports: + - port: 8443 + protocol: TCP + targetPort: 8443 + selector: + app: operator + type: ClusterIP +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -811,6 +912,10 @@ spec: image: tailscale/k8s-operator:unstable imagePullPolicy: Always name: operator + ports: + - containerPort: 8443 + name: keyserver + protocol: TCP volumeMounts: - mountPath: /oauth name: oauth diff --git a/cmd/k8s-operator/deploy/manifests/proxy.yaml b/cmd/k8s-operator/deploy/manifests/proxy.yaml index daa795b3e..eb108e01d 100644 --- a/cmd/k8s-operator/deploy/manifests/proxy.yaml +++ b/cmd/k8s-operator/deploy/manifests/proxy.yaml @@ -34,6 +34,10 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name securityContext: capabilities: add: diff --git a/cmd/k8s-operator/deploy/manifests/userspace-proxy.yaml b/cmd/k8s-operator/deploy/manifests/userspace-proxy.yaml index fe9fd443e..26e6b0783 100644 --- a/cmd/k8s-operator/deploy/manifests/userspace-proxy.yaml +++ b/cmd/k8s-operator/deploy/manifests/userspace-proxy.yaml @@ -22,3 +22,7 @@ spec: value: "true" - name: TS_AUTH_ONCE value: "true" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name diff --git a/cmd/k8s-operator/keyserver.go b/cmd/k8s-operator/keyserver.go new file mode 100644 index 000000000..4104c0410 --- /dev/null +++ b/cmd/k8s-operator/keyserver.go @@ -0,0 +1,244 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "context" + "fmt" + "net/http" + "strings" + + "go.uber.org/zap" + authv1 "k8s.io/api/authentication/v1" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apiserver/pkg/authentication/serviceaccount" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "tailscale.com/client/tailscale" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" +) + +type keyServer struct { + restConfig *rest.Config + client.Client + logger *zap.SugaredLogger + tsNamespace string + defaultDeviceTags []string + tsClient tsClient +} + +func (ks *keyServer) runKeyServer() error { + proxyServiceAccountName := fmt.Sprintf("system:serviceaccount:%s:proxies", ks.tsNamespace) + // create a client-go client as c/r client cannot be used to directly + // access Auth interface to create TokenReviews. TokenReviews are not + // objects that exist in cluster, so the normal c/r flow of 'CREATE and + // object, if needed to observe its current state GET it does not work + // here- we need to read the status from the TokenReview status as + // returned in response, so we need to use the actual auth client. + // TODO: maybe I actually don't need to do this because the object + // passed to c/r Create would get updated? + kubeClient, err := kubernetes.NewForConfig(ks.restConfig) + if err != nil { + return fmt.Errorf("error creating a new kube client: %v", err) + } + mux := http.NewServeMux() + mux.HandleFunc("/keys", func(w http.ResponseWriter, r *http.Request) { + ks.logger.Debugf("received request for an auth key") + // Get the auth token - like https://github.com/kubernetes/apiserver/blob/release-1.29/pkg/authentication/request/bearertoken/bearertoken.go#L42-L63 + auth := strings.TrimSpace(r.Header.Get("Authorization")) + if auth == "" { + ks.logger.Info("received a request with no auth header") + http.Error(w, "permission denied", http.StatusForbidden) + return + } + parts := strings.SplitN(auth, " ", 3) + if len(parts) < 2 || strings.ToLower(parts[0]) != "bearer" { + ks.logger.Info("received a request with no bearer token") + http.Error(w, "permission denied", http.StatusForbidden) + return + } + + token := parts[1] + + // Empty bearer tokens aren't valid + if len(token) == 0 { + ks.logger.Info("received a request with an empty bearer token") + http.Error(w, "permission denied", http.StatusForbidden) + return + } + // create a TokenReview + tr := &authv1.TokenReview{ + Spec: authv1.TokenReviewSpec{Token: token, Audiences: []string{"ts-keyserver"}}, + } + + // TODO: alt would be to delegate via auth webhook - that's how + // RBAC proxy does it. Compare. + resp, err := kubeClient.AuthenticationV1().TokenReviews().Create(r.Context(), tr, metav1.CreateOptions{}) + if err != nil { + ks.logger.Errorf("error creating a TokenReview: %v", err) + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + if !resp.Status.Authenticated { + ks.logger.Info("token was not authenticated") + http.Error(w, "permission denied", http.StatusForbidden) + return + } + // TODO: set and validate audience + // We know that only ServiceAccount 'proxies' in operator + // namespace should be allowed to call 'keys' endpoint. + // Alternatively we could assign 'proxies' an RBAC allowing it + // to call '/keys' endpoint (RBAC for non-resource URLs). At the + // moment I don't see a value in doing that as we know what + // ServiceAccount is allowed to perform the action and an + // operator installation always includes this ServiceAccount. + if username := resp.Status.User.Username; username != proxyServiceAccountName { + ks.logger.Info("received a request for token for user %s, expected %s", username, proxyServiceAccountName) + http.Error(w, "permission denied", http.StatusForbidden) + return + } + // TODO: ensure that this will always have extras when the token is sent from containerboot + if resp.Status.User.Extra == nil { + ks.logger.Info("received a request for a token that does not contain extra information, please report this") + http.Error(w, "unable to identify caller Pod", http.StatusForbidden) + return + } + if len(resp.Status.User.Extra[serviceaccount.PodNameKey]) != 1 || resp.Status.User.Extra[serviceaccount.PodNameKey][0] == "" { + ks.logger.Infof("impossible to identify caller Pod from token review response: %#+v", resp.Status.User.Extra[serviceaccount.PodNameKey]) + http.Error(w, "unable to identify caller Pod", http.StatusForbidden) + return + } + podName := types.NamespacedName{Namespace: ks.tsNamespace, Name: resp.Status.User.Extra[serviceaccount.PodNameKey][0]} + ks.logger.Debugf("request for key authenticated as from Pod %s", podName) + + // TODO: cache metadata only for these, filter ts namespace and labels + pod := &corev1.Pod{} + // TODO: is it right to use this context? + if err := ks.Client.Get(r.Context(), podName, pod); err != nil { + ks.logger.Errorf("unable to retrieve caller Pod from cache: %v", err) + http.Error(w, "unable to identify caller Pod", http.StatusForbidden) + return + } + // Get the parent resource and figure out what tags are needed. + // Alternatives could be 1) annotate Pods with the desired ACL + // tags 2) pass each StatefulSet a specific URL that includes + // the tags (i.e base64 encoded). But 2) would probably require + // RBAC for calling _that_ URL (and we currently use the same + // ServiceAccount for all proxies). 1) could be ok (and would + // also solve the problem where user updating ACL tags is not + // picked up by proxies), but should discuss the model + // (including what should happen when ACL tags are updated). + // Generally of course should speed this up much as possible. + tags, err := ks.tagsForPod(r.Context(), pod) + if err != nil { + ks.logger.Errorf("error determining ACL tags to apply to the auth key: %v", err) + http.Error(w, "error determining ACL tags", http.StatusInternalServerError) + return + } + // create the device + // TODO: bump a metric here. probably should also be user facing? + key, err := ks.newAuthKey(r.Context(), tags) + if err != nil { + ks.logger.Errorf("error determining ACL tags to apply to the auth key") + http.Error(w, "error creating a new auth key", http.StatusInternalServerError) + return + } + w.WriteHeader(200) + // probably? + w.Header().Set("Content-Type", "text/plain;charset=UTF-8") + w.Write([]byte(key)) + }) + srv := http.Server{ + Handler: mux, + Addr: ":8443", // 443 is auth proxy if that's too running on this operator instance + } + ks.logger.Infof("running key server on %v", srv.Addr) + return srv.ListenAndServe() +} + +func (ks *keyServer) newAuthKey(ctx context.Context, tags []string) (string, error) { + caps := tailscale.KeyCapabilities{ + Devices: tailscale.KeyDeviceCapabilities{ + Create: tailscale.KeyDeviceCreateCapabilities{ + Reusable: false, + Preauthorized: true, + Tags: tags, + }, + }, + } + key, _, err := ks.tsClient.CreateKey(ctx, caps) + if err != nil { + return "", err + } + return key, nil +} + +func (ks *keyServer) tagsForPod(ctx context.Context, pod *corev1.Pod) ([]string, error) { + parentLabels, err := managedLabelsFromPod(pod) + if err != nil { + return nil, fmt.Errorf("error determining parent resource labels: %v", err) + } + tags, err := ks.aclTagsForResource(ctx, parentLabels) + if err != nil { + return nil, fmt.Errorf("error determining ACl tags: %v", err) + } + return tags, nil +} + +func (ks *keyServer) aclTagsForResource(ctx context.Context, labels map[string]string) ([]string, error) { + switch labels[LabelParentType] { + case "svc": + svcName := types.NamespacedName{Namespace: labels[LabelParentNamespace], Name: labels[LabelParentName]} + svc := &corev1.Service{} + if err := ks.Get(ctx, svcName, svc); err != nil { + return nil, fmt.Errorf("error getting Service: %v", err) + } + return ks.aclsForObjectAnnotations(svc.Annotations), nil + case "ingress": + ingName := types.NamespacedName{Namespace: labels[LabelParentNamespace], Name: labels[LabelParentName]} + ing := &networkingv1.Ingress{} + if err := ks.Get(ctx, ingName, ing); err != nil { + return nil, fmt.Errorf("error getting Ingress: %v", err) + } + return ks.aclsForObjectAnnotations(ing.Annotations), nil + case "connector": + connectorName := types.NamespacedName{Name: labels[LabelParentName]} + conn := &tsapi.Connector{} + if err := ks.Get(ctx, connectorName, conn); err != nil { + return nil, fmt.Errorf("error getting Connector: %v", err) + } + if len(conn.Spec.Tags) > 0 { + return conn.Spec.Tags.Stringify(), nil + } + return ks.defaultDeviceTags, nil + default: + return nil, fmt.Errorf("unkown parent type: %s", labels[LabelParentType]) + } +} + +func (ks *keyServer) aclsForObjectAnnotations(annots map[string]string) []string { + if annots == nil || annots[AnnotationTags] == "" { + return ks.defaultDeviceTags + } + return strings.Split(annots[AnnotationTags], ",") +} + +func managedLabelsFromPod(pod *corev1.Pod) (map[string]string, error) { + labels := make(map[string]string) + for _, labelName := range []string{LabelManaged, LabelParentName, LabelParentNamespace, LabelParentType} { + if labelVal := pod.GetLabels()[labelName]; labelVal == "" { + return nil, fmt.Errorf("Pod does not have label: %s", labelName) + } else { + labels[labelName] = labelVal + } + } + return labels, nil +} diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 236c83ad8..90dc35f2e 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -222,6 +222,8 @@ func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string ByObject: map[client.Object]cache.ByObject{ &corev1.Secret{}: nsFilter, &appsv1.StatefulSet{}: nsFilter, + // TODO (irberkrm): cahce metadata only for Pods + &corev1.Pod{}: nsFilter, }, }, Scheme: tsapi.GlobalScheme, @@ -316,6 +318,16 @@ func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string if err != nil { startlog.Fatal("could not create proxyclass reconciler: %v", err) } + // TODO: maybe put in a better place, but this needs rest config and c/r client + ks := &keyServer{ + Client: mgr.GetClient(), + restConfig: mgr.GetConfig(), + logger: zlog.Named("keyserver"), + tsNamespace: tsNamespace, + defaultDeviceTags: strings.Split(tags, ","), // or do differently + tsClient: tsClient, + } + go ks.runKeyServer() startlog.Infof("Startup complete, operator running, version: %s", version.Long()) if err := mgr.Start(signals.SetupSignalHandler()); err != nil { startlog.Fatalf("could not start manager: %v", err) diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index 87f114b63..d3538a565 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -67,6 +67,9 @@ const ( // Annotations settable by users on ingresses. AnnotationFunnel = "tailscale.com/funnel" + // Tailnet VIP that this proxy should satisfy + AnnotationTSVIP = "tailscale.com/expose-via-vip" + // If set to true, set up iptables/nftables rules in the proxy forward // cluster traffic to the tailnet IP of that proxy. This can only be set // on an Ingress. This is useful in cases where a cluster target needs @@ -127,6 +130,7 @@ type tailscaleSTSConfig struct { Connector *connector ProxyClass string + TSVIP string // a tailnet VIP that should route to this proxy } type connector struct { @@ -308,9 +312,7 @@ func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger * return "", "", err } - var ( - authKey, hash string - ) + var hash string if orig == nil { // Secret doesn't exist yet, create one. Initially it contains // only the Tailscale authkey, but once Tailscale starts it'll @@ -327,21 +329,23 @@ func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger * } // Create API Key secret which is going to be used by the statefulset // to authenticate with Tailscale. - logger.Debugf("creating authkey for new tailscale proxy") - tags := stsC.Tags - if len(tags) == 0 { - tags = a.defaultTags - } - authKey, err = a.newAuthKey(ctx, tags) - if err != nil { - return "", "", err - } - } - if !shouldDoTailscaledDeclarativeConfig(stsC) && authKey != "" { - mak.Set(&secret.StringData, "authkey", authKey) + // logger.Debugf("creating authkey for new tailscale proxy") + // tags := stsC.Tags + // if len(tags) == 0 { + // tags = a.defaultTags + // } + // authKey, err = a.newAuthKey(ctx, tags) + // if err != nil { + // return "", "", err + // } } + // if !shouldDoTailscaledDeclarativeConfig(stsC) && authKey != "" { + // mak.Set(&secret.StringData, "authkey", authKey) + // } + + // TODO: this is going to be broken now, fix if shouldDoTailscaledDeclarativeConfig(stsC) { - confFileBytes, h, err := tailscaledConfig(stsC, authKey, orig) + confFileBytes, h, err := tailscaledConfig(stsC, "", orig) if err != nil { return "", "", fmt.Errorf("error creating tailscaled config: %w", err) } @@ -484,6 +488,17 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Value: "true", }) } + if sts.TSVIP != "" { + container.Env = append(container.Env, corev1.EnvVar{ + Name: "EXPERIMENTAL_TS_VIP", + Value: sts.TSVIP + "/32", + }) + container.Env = append(container.Env, corev1.EnvVar{ + Name: "TS_ROUTES", + Value: sts.TSVIP + "/32", + }) + + } if !shouldDoTailscaledDeclarativeConfig(sts) { container.Env = append(container.Env, corev1.EnvVar{ Name: "TS_HOSTNAME", @@ -529,6 +544,12 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S } pod.Spec.PriorityClassName = a.proxyPriorityClassName + keyURL := fmt.Sprintf("http://keyserver.%s.svc.cluster.local:8443/keys", a.operatorNamespace) + container.Env = append(container.Env, corev1.EnvVar{ + Name: "EXPERIMENTAL_AUTH_KEYS_ENDPOINT", + Value: keyURL, + }) + // Ingress/egress proxy configuration options. if sts.ClusterTargetIP != "" { container.Env = append(container.Env, corev1.EnvVar{ @@ -618,6 +639,10 @@ func applyProxyClassToStatefulSet(pc *tsapi.ProxyClass, ss *appsv1.StatefulSet) ss.ObjectMeta.Annotations = mergeStatefulSetLabelsOrAnnots(ss.ObjectMeta.Annotations, wantsSSAnnots, tailscaleManagedAnnotations) } + if pc.Spec.StatefulSet.Replicas != nil { + ss.Spec.Replicas = pc.Spec.StatefulSet.Replicas + } + // Update Pod fields. if pc.Spec.StatefulSet.Pod == nil { return ss diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go index 8820a3554..8c1f24e8c 100644 --- a/cmd/k8s-operator/svc.go +++ b/cmd/k8s-operator/svc.go @@ -197,6 +197,7 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga Tags: tags, ChildResourceLabels: crl, ProxyClass: proxyClass, + TSVIP: a.tailnetVIPForService(svc), } a.mu.Lock() @@ -337,6 +338,13 @@ func proxyClassForObject(o client.Object) string { return o.GetLabels()[LabelProxyClass] } +func (a *ServiceReconciler) tailnetVIPForService(svc *corev1.Service) string { + if !a.shouldExpose(svc) || svc.Annotations == nil { + return "" + } + return svc.GetAnnotations()[AnnotationTSVIP] +} + func proxyClassIsReady(ctx context.Context, name string, cl client.Client) (bool, error) { proxyClass := new(tsapi.ProxyClass) if err := cl.Get(ctx, types.NamespacedName{Name: name}, proxyClass); err != nil { diff --git a/k8s-operator/apis/v1alpha1/types_proxyclass.go b/k8s-operator/apis/v1alpha1/types_proxyclass.go index 98d7b5753..0b7e1dc1a 100644 --- a/k8s-operator/apis/v1alpha1/types_proxyclass.go +++ b/k8s-operator/apis/v1alpha1/types_proxyclass.go @@ -62,6 +62,11 @@ type StatefulSet struct { // Configuration for the proxy Pod. // +optional Pod *Pod `json:"pod,omitempty"` + // In future: allow users to tell the operator that spec.replicas for a + // statefulset should be unset to allow HPA manage this field (i.e if + // users set it to 0 here, unset the replicas field). + // +optional + Replicas *int32 `json:"replicas,omitempty"` } type Pod struct { diff --git a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go index efd202eee..82023df5a 100644 --- a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go +++ b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go @@ -363,6 +363,11 @@ func (in *StatefulSet) DeepCopyInto(out *StatefulSet) { *out = new(Pod) (*in).DeepCopyInto(*out) } + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StatefulSet. diff --git a/kube/api.go b/kube/api.go index b49b76c34..f6788f31e 100644 --- a/kube/api.go +++ b/kube/api.go @@ -3,7 +3,9 @@ package kube -import "time" +import ( + "time" +) // Note: The API types are copied from k8s.io/api{,machinery} to not introduce a // module dependency on the Kubernetes API as it pulls in many more dependencies. @@ -147,6 +149,31 @@ type Secret struct { Data map[string][]byte `json:"data,omitempty"` } +type TokenRequest struct { + TypeMeta `json:",inline"` + ObjectMeta `json:"metadata"` + Spec TokenRequestSpec `json:"spec"` + Status TokenRequestStatus `json:"status,omitempty"` +} + +type TokenRequestSpec struct { + Audiences []string `json:"audiences"` + ExpirationSeconds *int64 `json:"expirationSeconds"` + BoundObjectRef *BoundObjectReference `json:"boundObjectRef"` +} + +// BoundObjectReference is a reference to an object that a token is bound to. +type BoundObjectReference struct { + Kind string `json:"kind,omitempty"` + APIVersion string `json:"apiVersion,omitempty"` + Name string `json:"name,omitempty"` +} + +type TokenRequestStatus struct { + Token string `json:"token"` + ExpirationSeconds time.Time `json:"expirationSeconds"` +} + // Status is a return value for calls that don't return other objects. type Status struct { TypeMeta `json:",inline"` diff --git a/kube/client.go b/kube/client.go index f4befd1c8..28fd639fe 100644 --- a/kube/client.go +++ b/kube/client.go @@ -127,6 +127,10 @@ func (c *Client) secretURL(name string) string { return fmt.Sprintf("%s/api/v1/namespaces/%s/secrets/%s", c.url, c.ns, name) } +func (c *Client) tokenRequestURL() string { + return fmt.Sprintf("%s/api/v1/namespaces/%s/serviceaccounts/proxies/token", c.url, c.ns) +} + func getError(resp *http.Response) error { if resp.StatusCode == 200 || resp.StatusCode == 201 { // These are the only success codes returned by the Kubernetes API. @@ -154,7 +158,11 @@ func setHeader(key, value string) func(*http.Request) { // If the request fails with a 401, the token is expired and a new one is // requested. func (c *Client) doRequest(ctx context.Context, method, url string, in, out any, opts ...func(*http.Request)) error { - req, err := c.newRequest(ctx, method, url, in) + tk, err := c.getOrRenewToken() + if err != nil { + return err + } + req, err := c.newRequest(ctx, method, url, in, tk) if err != nil { return err } @@ -178,11 +186,7 @@ func (c *Client) doRequest(ctx context.Context, method, url string, in, out any, return nil } -func (c *Client) newRequest(ctx context.Context, method, url string, in any) (*http.Request, error) { - tk, err := c.getOrRenewToken() - if err != nil { - return nil, err - } +func (c *Client) newRequest(ctx context.Context, method, url string, in any, token string) (*http.Request, error) { var body io.Reader if in != nil { switch in := in.(type) { @@ -204,7 +208,7 @@ func (c *Client) newRequest(ctx context.Context, method, url string, in any) (*h req.Header.Add("Content-Type", "application/json") } req.Header.Add("Accept", "application/json") - req.Header.Add("Authorization", "Bearer "+tk) + req.Header.Add("Authorization", "Bearer "+token) return req, nil } @@ -217,6 +221,28 @@ func (c *Client) GetSecret(ctx context.Context, name string) (*Secret, error) { return s, nil } +func (c *Client) GetOrCreateSecret(ctx context.Context, name string) (*Secret, error) { + secret, err := c.GetSecret(ctx, name) + if err != nil { + if st, ok := err.(*Status); ok && st.Code == 404 { + secret = &Secret{ + TypeMeta: TypeMeta{ + APIVersion: "v1", + Kind: "Secret", + }, + ObjectMeta: ObjectMeta{ + Name: name, + }, + } + if err := c.CreateSecret(ctx, secret); err != nil { + return nil, fmt.Errorf("error creating Secret: %w", err) + } + } + } + return secret, nil + +} + // CreateSecret creates a secret in the Kubernetes API. func (c *Client) CreateSecret(ctx context.Context, s *Secret) error { s.Namespace = c.ns @@ -228,6 +254,28 @@ func (c *Client) UpdateSecret(ctx context.Context, s *Secret) error { return c.doRequest(ctx, "PUT", c.secretURL(s.Name), s, nil) } +func (c *Client) CreateTokenForPod(ctx context.Context, name string, audiences []string) (string, error) { + tReq := &TokenRequest{ + Spec: TokenRequestSpec{ + BoundObjectRef: &BoundObjectReference{ + Kind: "Pod", + Name: name, + }, + }, + } + if len(audiences) != 0 { + tReq.Spec.Audiences = audiences + } + err := c.doRequest(ctx, "POST", c.tokenRequestURL(), tReq, tReq) + if err != nil { + return "", fmt.Errorf("error creating a token: %w", err) + } + if tReq.Status.Token == "" { + return "", fmt.Errorf("Kubernetes did not give us a token, full request: %+#v\n", tReq) + } + return tReq.Status.Token, nil +} + // JSONPatch is a JSON patch operation. // It currently (2023-03-02) only supports the "remove" operation. //