diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 8e5c8ad0f..10fa31381 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -15,7 +15,9 @@ // - TS_HOSTNAME: the hostname to request for the node. // - TS_ROUTES: subnet routes to advertise. To accept routes, use TS_EXTRA_ARGS to pass in --accept-routes. // - TS_DEST_IP: proxy all incoming Tailscale traffic to the given -// destination. +// destination defined by an IP address. +// - TS_DEST_DNS: proxy all incoming Tailscale traffic to the given +// destination defined by a DNS name. // - TS_TAILNET_TARGET_IP: proxy all incoming non-Tailscale traffic to the given // destination defined by an IP. // - TS_TAILNET_TARGET_FQDN: proxy all incoming non-Tailscale traffic to the given @@ -63,6 +65,7 @@ import ( "fmt" "io/fs" "log" + "net" "net/netip" "os" "os/exec" @@ -80,6 +83,8 @@ import ( "golang.org/x/sys/unix" "tailscale.com/client/tailscale" "tailscale.com/ipn" + "tailscale.com/net/dns/recursive" + "tailscale.com/net/dns/resolvconffile" "tailscale.com/tailcfg" "tailscale.com/types/logger" "tailscale.com/types/ptr" @@ -104,6 +109,7 @@ func main() { Routes: defaultEnv("TS_ROUTES", ""), ServeConfigPath: defaultEnv("TS_SERVE_CONFIG", ""), ProxyTo: defaultEnv("TS_DEST_IP", ""), + ProxyToDNS: defaultEnv("TS_DEST_DNS", ""), TailnetTargetIP: defaultEnv("TS_TAILNET_TARGET_IP", ""), TailnetTargetFQDN: defaultEnv("TS_TAILNET_TARGET_FQDN", ""), DaemonExtraArgs: defaultEnv("TS_TAILSCALED_EXTRA_ARGS", ""), @@ -322,7 +328,7 @@ authLoop: } var ( - wantProxy = cfg.ProxyTo != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" + wantProxy = cfg.ProxyTo != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.ProxyToDNS != "" wantDeviceInfo = cfg.InKubernetes && cfg.KubeSecret != "" && cfg.KubernetesCanPatch startupTasksDone = false currentIPs deephash.Sum // tailscale IPs assigned to device @@ -427,6 +433,12 @@ runLoop: log.Fatalf("installing ingress proxy rules: %v", err) } } + if cfg.ProxyToDNS != "" && len(addrs) > 0 && ipsHaveChanged { + log.Printf("Installing proxy rules") + if err := installIngressForwardingRuleExternalNameService(ctx, cfg.ProxyToDNS, addrs, nfr); err != nil { + log.Fatalf("installing ingress proxy rules for External Name Service: %v", err) + } + } if cfg.ServeConfigPath != "" && len(n.NetMap.DNS.CertDomains) > 0 { cd := n.NetMap.DNS.CertDomains[0] prev := certDomain.Swap(ptr.To(cd)) @@ -846,6 +858,37 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne return nil } +func installIngressForwardingRuleExternalNameService(ctx context.Context, name string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { + dsts, err := resolveDNS(ctx, name) + if err != nil { + return fmt.Errorf("error resolving DNS name: %v", err) + } + for _, dst := range dsts { + log.Printf("DNS name %s resolved to %s", name, dst.String()) + } + var local netip.Addr + for _, pfx := range tsIPs { + // TODO (irbekrm): support IPv6 + if !(pfx.IsSingleIP() && pfx.Addr().Is4()) { + continue + } + local = pfx.Addr() + break + } + if !local.IsValid() { + return fmt.Errorf("no tailscale IP matching family found in %v", tsIPs) + } + if err := nfr.DNATWithLoadBalancer(local, dsts); err != nil { + return fmt.Errorf("installing DNAT rules for ingress to %s: %w", name, err) + } + for _, dst := range dsts { + if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { + return fmt.Errorf("adding rule to clamp traffic to %v: %w", dst, err) + } + } + return nil +} + // settings is all the configuration for containerboot. type settings struct { AuthKey string @@ -854,7 +897,8 @@ type settings struct { // ProxyTo is the destination IP to which all incoming // Tailscale traffic should be proxied. If empty, no proxying // is done. This is typically a locally reachable IP. - ProxyTo string + ProxyTo string + ProxyToDNS string // TailnetTargetIP is the destination IP to which all incoming // non-Tailscale traffic should be proxied. This is typically a // Tailscale IP. @@ -879,6 +923,27 @@ type settings struct { KubernetesCanPatch bool } +func resolveDNS(ctx context.Context, name string) ([]netip.Addr, error) { + // net/dns/recursive/recursive.go + // send a DNS query to kube dns server + // TODO: watch for resolv.conf changes + conf, err := resolvconffile.ParseFile(resolvconffile.Path) + if err != nil { + return []netip.Addr{}, fmt.Errorf("error parsing resolv.conf: %v", err) + } + if len(conf.Nameservers) == 0 { + return []netip.Addr{}, errors.New("resolv.conf contains no nameservers") + } + // TODO (irbekrm): support IPv6 + res := recursive.NewResolverWithRoots(&net.Dialer{}, log.Printf, conf.Nameservers, true) + addrs, _, err := res.Resolve(ctx, name) + if len(addrs) < 1 { + // TODO (irbekrm): pretty print []netip.Addrs + return nil, fmt.Errorf("no IPv4 addresses returned for DNS name %s from nameservers %+#v", conf.Nameservers) + } + return addrs, err +} + // defaultEnv returns the value of the given envvar name, or defVal if // unset. func defaultEnv(name, defVal string) string { diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index d762acd9a..17a0da793 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -332,7 +332,6 @@ func managedResourceHandlerForType(typ string) handler.MapFunc { {NamespacedName: parentFromObjectLabels(o)}, } } - } func serviceHandler(_ context.Context, o client.Object) []reconcile.Request { diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index 6e54d9741..630b00d5b 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -67,9 +67,13 @@ type tailscaleSTSConfig struct { ChildResourceLabels map[string]string ServeConfig *ipn.ServeConfig - // Tailscale target in cluster we are setting up ingress for + + // Cluster target for ingress defined by an IP address ClusterTargetIP string + // Cluster target for ingress defined by a DNS name + ClusterTargetDNS string + // Tailscale IP of a Tailscale service we are setting up egress for TailnetTargetIP string @@ -387,6 +391,11 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Name: "TS_DEST_IP", Value: sts.ClusterTargetIP, }) + } else if sts.ClusterTargetDNS != "" { + container.Env = append(container.Env, corev1.EnvVar{ + Name: "TS_DEST_DNS", + Value: sts.ClusterTargetDNS, + }) } else if sts.TailnetTargetIP != "" { container.Env = append(container.Env, corev1.EnvVar{ Name: "TS_TAILNET_TARGET_IP", diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go index d6b810e73..fdcb9ba8b 100644 --- a/cmd/k8s-operator/svc.go +++ b/cmd/k8s-operator/svc.go @@ -186,7 +186,11 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga } a.mu.Lock() - if a.shouldExpose(svc) { + if a.shouldExpose(svc) && svc.Spec.Type == corev1.ServiceTypeExternalName { + sts.ClusterTargetDNS = svc.Spec.ExternalName + a.managedIngressProxies.Add(svc.UID) + gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len())) + } else if a.shouldExpose(svc) { sts.ClusterTargetIP = svc.Spec.ClusterIP a.managedIngressProxies.Add(svc.UID) gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len())) @@ -279,13 +283,14 @@ func validateService(svc *corev1.Service) []string { violations = append(violations, fmt.Sprintf("invalid value of annotation %s: %q does not appear to be a valid MagicDNS name", AnnotationTailnetTargetFQDN, fqdn)) } } + // TODO: if external name service, check that the name is a DNS name (not an IP address) return violations } func (a *ServiceReconciler) shouldExpose(svc *corev1.Service) bool { // Headless services can't be exposed, since there is no ClusterIP to // forward to. - if svc.Spec.ClusterIP == "" || svc.Spec.ClusterIP == "None" { + if svc.Spec.Type != corev1.ServiceTypeExternalName && (svc.Spec.ClusterIP == "" || svc.Spec.ClusterIP == "None") { return false } diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index fe17198c5..cc904b18e 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -4011,10 +4011,13 @@ func (b *LocalBackend) routerConfig(cfg *wgcfg.Config, prefs ipn.PrefsView, oneC singleRouteThreshold = 1 } - netfilterKind := b.capForcedNetfilter + b.mu.Lock() + netfilterKind := b.capForcedNetfilter // protected by b.mu + b.mu.Unlock() + if prefs.NetfilterKind() != "" { - if b.capForcedNetfilter != "" { - b.logf("nodeattr netfilter preference %s overridden by c2n pref %s", b.capForcedNetfilter, prefs.NetfilterKind()) + if netfilterKind != "" { + b.logf("nodeattr netfilter preference %s overridden by c2n pref %s", netfilterKind, prefs.NetfilterKind()) } netfilterKind = prefs.NetfilterKind() } diff --git a/net/dns/recursive/recursive.go b/net/dns/recursive/recursive.go index eb23004d8..2db244098 100644 --- a/net/dns/recursive/recursive.go +++ b/net/dns/recursive/recursive.go @@ -126,6 +126,16 @@ type Resolver struct { // - DNS-over-HTTPS or DNS-over-TLS support } +// TODO (irbekrm): make these config params into resolver opts? +func NewResolverWithRoots(dialer netns.Dialer, log logger.Logf, roots []netip.Addr, noIPv6 bool) *Resolver { + return &Resolver{ + Dialer: dialer, + Logf: log, + NoIPv6: noIPv6, + rootServers: roots, + } +} + // queryState stores all state during the course of a single query type queryState struct { // rootServers are the root nameservers to start from diff --git a/util/linuxfw/iptables_runner.go b/util/linuxfw/iptables_runner.go index 9211bbd16..72795793c 100644 --- a/util/linuxfw/iptables_runner.go +++ b/util/linuxfw/iptables_runner.go @@ -307,6 +307,27 @@ func (i *iptablesRunner) DNATNonTailscaleTraffic(tun string, dst netip.Addr) err return table.Insert("nat", "PREROUTING", 1, "!", "-i", tun, "-j", "DNAT", "--to-destination", dst.String()) } +// DNATWithLoadBalancer adds DNAT rules to load balance all incoming traffic NOT +// destined to tun interface to provided destinations using round robin. +// NB: this function clears the nat PREROUTING chain on start, so it is only +// safe to use on systems where Tailscale is the only process that uses this +// chain (i.e containers). +func (i *iptablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error { + table := i.getIPTByAddr(dsts[0]) + if err := table.ClearChain("nat", "PREROUTING"); err != nil && !isErrChainNotExist(err) { + return fmt.Errorf("error deleting rules from the PREROUTING chain: %w", err) + } + // If dsts contain more than one address, for n := n in range(len(dsts)..2) route packets for every nth connection to dsts[n]. + for i := len(dsts); i >= 2; i-- { + dst := dsts[i-1] // the order in which rules for addrs are installed does not matter + if err := table.Append("nat", "PREROUTING", "--destination", origDst.String(), "-m", "statistic", "--mode", "nth", "--every", fmt.Sprint(i), "--packet", "0", "-j", "DNAT", "--to-destination", dst.String()); err != nil { + return fmt.Errorf("error adding DNAT rule for %s: %w", dst.String(), err) + } + } + // If the packet falls through to this rule, we route to the first destination in the list unconditionally. + return table.Append("nat", "PREROUTING", "--destination", origDst.String(), "-j", "DNAT", "--to-destination", dsts[0].String()) +} + func (i *iptablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { table := i.getIPTByAddr(addr) return table.Append("mangle", "FORWARD", "-o", tun, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") diff --git a/util/linuxfw/nftables_runner.go b/util/linuxfw/nftables_runner.go index 3092b08b5..f199ad8a2 100644 --- a/util/linuxfw/nftables_runner.go +++ b/util/linuxfw/nftables_runner.go @@ -10,12 +10,14 @@ import ( "encoding/hex" "errors" "fmt" + "log" "net" "net/netip" "reflect" "strings" "github.com/google/nftables" + "github.com/google/nftables/binaryutil" "github.com/google/nftables/expr" "golang.org/x/sys/unix" "tailscale.com/net/tsaddr" @@ -109,7 +111,6 @@ func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error { dadderLen = 16 fam = unix.NFPROTO_IPV6 } - dnatRule := &nftables.Rule{ Table: nat, Chain: preroutingCh, @@ -139,6 +140,100 @@ func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error { n.conn.InsertRule(dnatRule) return n.conn.Flush() } +func (n *nftablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error { + nat, preroutingCh, err := n.ensurePreroutingChain(dsts[0]) + if err != nil { + return err + } + // Delete all rules from the nat prerouting chain. + n.conn.FlushChain(preroutingCh) + + addrType := nftables.TypeIPAddr + if dsts[0].Is6() { + addrType = nftables.TypeIP6Addr + } + + set := &nftables.Set{ + Table: nat, + IsMap: true, + Name: "addrs", + ID: uint32(1), + KeyType: nftables.TypeInteger, + DataType: addrType, + } + if set, err = createSetIfNotExist(n.conn, set); err != nil { + return fmt.Errorf("error ensuring a set: %v", err) + } + err = n.conn.Flush() + if err != nil { + log.Printf("error flushing after creating set: %v", err) + } + // n.conn.FlushSet(set) + // setElems := make([]nftables.SetElement, len(dsts)) + // for i, dst := range dsts { + // setElems[i] = nftables.SetElement{ + // Key: binaryutil.BigEndian.PutUint16(uint16(i)), + // Val: dst.AsSlice(), + // } + // } + element := []nftables.SetElement{ + { + Key: binaryutil.BigEndian.PutUint16(uint16(22)), + Val: []byte(net.ParseIP(dsts[0].String()).To4()), + }, + } + if err := n.conn.SetAddElements(set, element); err != nil { + return fmt.Errorf("error after adding set elements: %v", err) + } + err = n.conn.Flush() + if err != nil { + log.Printf("error flushing after adding set elements: %v", err) + } + var daddrOffset, dadderLen, famConst uint32 + if dsts[0].Is4() { + famConst = unix.NFPROTO_IPV4 + daddrOffset = 16 + dadderLen = 4 + } else { + famConst = unix.NFPROTO_IPV6 + daddrOffset = 24 + dadderLen = 16 + } + fmt.Println(famConst) + + dnatRule := &nftables.Rule{ + Table: nat, + Chain: preroutingCh, + Exprs: []expr.Any{ + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseNetworkHeader, + Offset: daddrOffset, + Len: dadderLen, + }, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: origDst.AsSlice(), + }, + &expr.Numgen{ + Register: 1, + Type: unix.NFT_NG_INCREMENTAL, + Modulus: uint32(len(dsts)), + }, + // &expr.NAT{ + // Type: expr.NATTypeDestNAT, + // Family: famConst, + // RegAddrMin: 1, // reg 1 contains the numgen expr that will evaluate to a destination IP + // }, + }, + } + rule := n.conn.AddRule(dnatRule) + log.Printf("\nrule is %#+v\n", rule) + err = n.conn.Flush() + log.Printf("received error: %v", err) + return nil +} func (n *nftablesRunner) DNATNonTailscaleTraffic(tunname string, dst netip.Addr) error { nat, preroutingCh, err := n.ensurePreroutingChain(dst) @@ -356,6 +451,22 @@ func getTableIfExists(c *nftables.Conn, family nftables.TableFamily, name string return nil, nil } +func createSetIfNotExist(c *nftables.Conn, set *nftables.Set) (*nftables.Set, error) { + sets, err := c.GetSets(set.Table) + if err != nil { + return nil, fmt.Errorf("error listing sets: %v", err) + } + for _, s := range sets { + if s.Name == set.Name { + return s, nil + } + } + if err := c.AddSet(set, nil); err != nil { + return nil, fmt.Errorf("error creating a set: %v", err) + } + return set, nil +} + // createTableIfNotExist creates a nftables table via connection c if it does // not exist within the given family. func createTableIfNotExist(c *nftables.Conn, family nftables.TableFamily, name string) (*nftables.Table, error) { @@ -494,6 +605,14 @@ type NetfilterRunner interface { // to the provided destination, as used in the Kubernetes ingress proxies. AddDNATRule(origDst, dst netip.Addr) error + // DNATWithLoadBalancer adds a rule to the nat/PREROUTING chain to DNAT + // traffic destined for the given original destination to the given new + // destination(s) using round robin to load balance if more than one + // destination is provided. This is used to forward all traffic destined + // for the Tailscale interface to the provided destination(s), as used + // in the Kubernetes ingress proxies. + DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error + // AddSNATRuleForDst adds a rule to the nat/POSTROUTING chain to SNAT // traffic destined for dst to src. // This is used to forward traffic destined for the local machine over @@ -503,7 +622,7 @@ type NetfilterRunner interface { // DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT // all traffic inbound from any interface except exemptInterface to dst. // This is used to forward traffic destined for the local machine over - // the Tailscale interface, as used in the Kubernetes egress proxies.// + // the Tailscale interface, as used in the Kubernetes egress proxies. DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error // ClampMSSToPMTU adds a rule to the mangle/FORWARD chain to clamp MSS for