From e49416783782c27289c7b248d3d37859f46cc2c3 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Fri, 29 Sep 2023 08:35:02 +0100 Subject: [PATCH] cmd/containerboot,cmd/k8s-operator: ingress/egress proxy can optionally use nftables Makes it possible for users to make the ingress/egress proxies created by the operator to use nftables by setting PROXY_FIREWALL_MODE=nftables. Internally this makes use of the same iptables/nftables functionality used by tailscaled. Also ensures that the same firewall mode (iptables or nftables) are used by both tailscaled and proxy ingress/egress rules Signed-off-by: Irbe Krumina --- cmd/containerboot/ipnft.go | 605 +++++++++++++++++++++++++++++++++ cmd/containerboot/main.go | 99 +++--- cmd/containerboot/main_test.go | 51 +-- cmd/k8s-operator/operator.go | 16 +- cmd/k8s-operator/sts.go | 7 + 5 files changed, 666 insertions(+), 112 deletions(-) create mode 100644 cmd/containerboot/ipnft.go diff --git a/cmd/containerboot/ipnft.go b/cmd/containerboot/ipnft.go new file mode 100644 index 000000000..d6313690b --- /dev/null +++ b/cmd/containerboot/ipnft.go @@ -0,0 +1,605 @@ +package main + +import ( + "context" + "fmt" + "log" + "net" + "net/netip" + "os" + + "github.com/coreos/go-iptables/iptables" + "github.com/google/nftables" + "github.com/google/nftables/expr" + "golang.org/x/sys/unix" + "tailscale.com/types/logger" + "tailscale.com/util/linuxfw" + "tailscale.com/wgengine/router" +) + +// The contents of this file are partially adapted from util/linuxfw/iptables_runner.go + +const ( + postRoutingChain = "POSTROUTING" + preroutingChain = "PREROUTING" + forwardChain = "FORWARD" + + tailscaleInterface = "tailscale0*" + + snat = "SNAT" + dnat = "DNAT" + masquerade = "MASQUERADE" + + insertPosition = 1 +) + +type netfilterRunner interface { + addIngressDNAT(netip.Addr, netip.Addr) error + addEgressSNAT(netip.Addr, netip.Addr) error + addEgressDNAT(netip.Addr) error + addClamping(netip.Addr) error +} + +func determineProxyFirewallMode() linuxfw.FirewallMode { + tableDetector := &router.LinuxFWDetector{} + switch { + case os.Getenv("TS_FIREWALL_MODE") == "nftables": + log.Print("TS_FIREWALL_MODE set to nftables; proxy will use nftables") + return linuxfw.FirewallModeNfTables + case os.Getenv("TS_FIREWALL_MODE") == "auto": + m := router.ChooseFireWallMode(logger.FromContext(context.Background()), tableDetector) + log.Printf("TS_FIREWALL_MODE set to auto; proxy will use %s", m) + return m + case os.Getenv("TS_FIREWALL_MODE") == "iptables": + log.Print("TS_FIREWALL_MODE set to iptables; proxy will use iptables") + return linuxfw.FirewallModeIPTables + default: + log.Print("TS_FIREWALL_MODE is not set; proxy will use iptables") + return linuxfw.FirewallModeIPTables + } +} + +func newNetFilterRunner(mode linuxfw.FirewallMode) (netfilterRunner, error) { + var nfr netfilterRunner + var err error + switch mode { + case linuxfw.FirewallModeIPTables: + log.Print("using iptables to set up proxy rules") + nfr, err = newIPTablesRunner(logger.FromContext(context.Background())) + if err != nil { + return nil, err + } + case linuxfw.FirewallModeNfTables: + log.Print("using nftables to set up proxy rules") + nfr, err = newNfTablesRunner(logger.FromContext(context.Background())) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unknown firewall mode: %v", mode) + } + return nfr, nil +} + +// newIPTablesRunner constructs a netFilterRunner that programs iptables rules. +// If the underlying iptables library fails to initialize, that error is +// returned. The runner probes for IPv6 support once at initialization time and +// if not found, no IPv6 rules will be modified for the lifetime of the runner. +func newIPTablesRunner(logf logger.Logf) (netfilterRunner, error) { + ipt4, err := iptables.NewWithProtocol(iptables.ProtocolIPv4) + if err != nil { + return nil, err + } + + supportsV6, supportsV6NAT := false, false + v6err := linuxfw.CheckIPv6(logf) + ip6terr := linuxfw.CheckIP6TablesExists() + switch { + case v6err != nil: + logf("disabling tunneled IPv6 due to system IPv6 config: %v", v6err) + case ip6terr != nil: + logf("disabling tunneled IPv6 due to missing ip6tables: %v", ip6terr) + default: + supportsV6 = true + supportsV6NAT = supportsV6 && linuxfw.CheckSupportsV6NAT() + logf("v6nat = %v", supportsV6NAT) + } + + var ipt6 *iptables.IPTables + if supportsV6 { + ipt6, err = iptables.NewWithProtocol(iptables.ProtocolIPv6) + if err != nil { + return nil, err + } + } + return &iptablesRunner{ipt4, ipt6, supportsV6, supportsV6NAT}, nil +} + +type iptablesI interface { + Insert(string, string, int, ...string) error + Append(string, string, ...string) error +} + +// iptablesRunner is an implementation of netfilterRunner for iptables +type iptablesRunner struct { + ipt4 iptablesI + ipt6 iptablesI + + v6Available bool + v6NATAvailable bool +} + +// getIPTByAddr returns the iptablesInterface with correct IP family +// that we will be using for the given address. +func (i *iptablesRunner) getIPTByAddrForTable(addr netip.Addr, table string) (iptablesI, error) { + nf := i.ipt4 + if addr.Is6() { + if !i.v6Available { + return nil, fmt.Errorf("ipv6 address requested %v, but ipv6 iptables are not available", addr) + } + if table == "nat" && !i.v6NATAvailable { + return nil, fmt.Errorf("ipv6 address requested %v, but system does not support nat for ipv6 iptables", addr) + } + nf = i.ipt6 + } + return nf, nil +} + +func (i *iptablesRunner) addIngressDNAT(destination netip.Addr, destinationFilter netip.Addr) error { + table, err := i.getIPTByAddrForTable(destination, "nat") + if err != nil { + return fmt.Errorf("error setting up iptables for ingress DNAT: %w", err) + } + return table.Insert("nat", preroutingChain, insertPosition, "-d", destinationFilter.String(), "-j", dnat, "--to-destination", destination.String()) +} + +func (i *iptablesRunner) addEgressDNAT(destination netip.Addr) error { + table, err := i.getIPTByAddrForTable(destination, "nat") + if err != nil { + return fmt.Errorf("error setting up iptables for egress DNAT: %w", err) + } + return table.Insert("nat", preroutingChain, insertPosition, "!", "-i", tailscaleInterface, "-j", dnat, "--to-destination", destination.String()) +} + +func (i *iptablesRunner) addEgressSNAT(source, destinationFilter netip.Addr) error { + table, err := i.getIPTByAddrForTable(source, "nat") + if err != nil { + return fmt.Errorf("error setting up iptables for egress SNAT: %w", err) + } + return table.Insert("nat", postRoutingChain, insertPosition, "--destination", destinationFilter.String(), "-j", masquerade) +} + +func (i *iptablesRunner) addClamping(addr netip.Addr) error { + table, err := i.getIPTByAddrForTable(addr, "mangle") + if err != nil { + return fmt.Errorf("error setting up iptables for clamping: %w", err) + } + return table.Append("mangle", forwardChain, "-o", tailscaleInterface, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") +} + +type connI interface { + InsertRule(*nftables.Rule) *nftables.Rule + Flush() error +} + +// nftablesRunner is an implementation of netfilterRunner for nftables +type nftablesRunner struct { + conn linuxfw.Conn + nft4 *nftableFamily + nft6 *nftableFamily + + v6Available bool + v6NATAvailable bool +} + +type nftableFamily struct { + Proto nftables.TableFamily + Nat *nftables.Table + Filter *nftables.Table +} + +// newNfTablesRunner creates a new nftablesRunner +func newNfTablesRunner(logf logger.Logf) (*nftablesRunner, error) { + conn, err := nftables.New() + if err != nil { + return nil, fmt.Errorf("nftables connection: %w", err) + } + nft4 := &nftableFamily{Proto: nftables.TableFamilyIPv4} + + v6err := linuxfw.CheckIPv6(logf) + if v6err != nil { + logf("disabling tunneled IPv6 due to system IPv6 config: %v", v6err) + } + supportsV6 := v6err == nil + supportsV6NAT := supportsV6 && linuxfw.CheckSupportsV6NAT() + + var nft6 *nftableFamily + if supportsV6 { + logf("v6nat availability: %v", supportsV6NAT) + nft6 = &nftableFamily{Proto: nftables.TableFamilyIPv6} + } + + return &nftablesRunner{ + conn: conn, + nft4: nft4, + nft6: nft6, + v6Available: supportsV6, + v6NATAvailable: supportsV6NAT, + }, nil +} + +// getNFTByAddr returns the nftables with correct IP family +// that we will be using for the given address. +func (n *nftablesRunner) getNFTByAddrForTable(addr netip.Addr, table string) (*nftableFamily, error) { + if addr.Is6() { + if !n.v6Available { + return nil, fmt.Errorf("ipv6 address in use, but ipv6 nftables are not available") + } + if table == "nat" && !n.v6NATAvailable { + return nil, fmt.Errorf("ipv6 address in use, but ipv6 nftables modules for nat are not available") + } + return n.nft6, nil + } + return n.nft4, nil +} + +// getTables gets the available nftable in nftables runner. +func (n *nftablesRunner) getTables() []*nftableFamily { + if n.v6Available { + return []*nftableFamily{n.nft4, n.nft6} + } + return []*nftableFamily{n.nft4} +} + +func (n *nftablesRunner) addIngressDNAT(destination netip.Addr, destinationFilter netip.Addr) error { + polAccept := nftables.ChainPolicyAccept + table, err := n.getNFTByAddrForTable(destination, "nat") + if err != nil { + return err + } + nat, err := linuxfw.CreateTableIfNotExist(n.conn, table.Proto, "nat") + if err != nil { + return fmt.Errorf("error ensuring nat table: %w", err) + } + table.Nat = nat + + // ensure prerouting chain exists + var preroutingCh *nftables.Chain + if preroutingCh, err = linuxfw.CreateChainIfNotExist(n.conn, linuxfw.ChainInfo{ + Table: nat, + Name: preroutingChain, + ChainType: nftables.ChainTypeNAT, + ChainHook: nftables.ChainHookPrerouting, + ChainPriority: nftables.ChainPriorityNATDest, + ChainPolicy: &polAccept, + }); err != nil { + return fmt.Errorf("error ensuring prerouting chain: %w", err) + } + + daddrOffset, err := daddrOffsetForFam(table.Proto) + if err != nil { + return fmt.Errorf("error determining destination address offset: %w", err) + } + daddrLen, err := ipAddressLenForFam(table.Proto) + if err != nil { + return fmt.Errorf("error determining ip address length: %w", err) + } + famConst, err := nfFamilyConst(table.Proto) + if err != nil { + return fmt.Errorf("error determining ip family: %w", err) + } + daddr, err := ipForFam(table.Proto, destinationFilter) + if err != nil { + return fmt.Errorf("error parsing destination IP address: %w", err) + } + clusterDaddr, err := ipForFam(table.Proto, destination) + if err != nil { + return fmt.Errorf("error parsing cluster destination IP address: %w", err) + } + + dnatRule := &nftables.Rule{ + Table: nat, + Chain: preroutingCh, + Exprs: []expr.Any{ + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseNetworkHeader, + Offset: daddrOffset, + Len: daddrLen, + }, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: daddr, + }, + &expr.Immediate{ + Register: 1, + Data: clusterDaddr, + }, + &expr.NAT{ + Type: expr.NATTypeDestNAT, + Family: famConst, + RegAddrMin: 1, + }, + }, + } + n.conn.InsertRule(dnatRule) + n.conn.Flush() + + return nil +} + +func (n *nftablesRunner) addEgressDNAT(destination netip.Addr) error { + polAccept := nftables.ChainPolicyAccept + table, err := n.getNFTByAddrForTable(destination, "nat") + if err != nil { + return err + } + nat, err := linuxfw.CreateTableIfNotExist(n.conn, table.Proto, "nat") + if err != nil { + return fmt.Errorf("error ensuring nat table exists: %w", err) + } + table.Nat = nat + + // ensure prerouting chain exists + var preroutingCh *nftables.Chain + if preroutingCh, err = linuxfw.CreateChainIfNotExist(n.conn, linuxfw.ChainInfo{ + Table: nat, + Name: preroutingChain, + ChainType: nftables.ChainTypeNAT, + ChainHook: nftables.ChainHookPrerouting, + ChainPriority: nftables.ChainPriorityNATDest, + ChainPolicy: &polAccept, + }); err != nil { + return fmt.Errorf("error ensuring prerouting chain: %w", err) + } + famConst, err := nfFamilyConst(table.Proto) + if err != nil { + return fmt.Errorf("error determining ip family: %w", err) + } + ip, err := ipForFam(table.Proto, destination) + if err != nil { + return fmt.Errorf("error parsing IP address: %w", err) + } + + dnatRule := &nftables.Rule{ + Table: nat, + Chain: preroutingCh, + Exprs: []expr.Any{ + &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpNeq, + Register: 1, + Data: ifname(tailscaleInterface), + }, + &expr.Immediate{ + Register: 1, + Data: ip, + }, + &expr.NAT{ + Type: expr.NATTypeDestNAT, + Family: famConst, + RegAddrMin: 1, + }, + }, + } + // Tailnet egress IP is passed to the proxy as an env var- that means + // that if it ever changes Pods will be restarted, so we don't have to + // worry about muliple rules with different destination addresses + n.conn.AddRule(dnatRule) + n.conn.Flush() + return nil +} + +func (n *nftablesRunner) addEgressSNAT(source, destinationFilter netip.Addr) error { + polAccept := nftables.ChainPolicyAccept + table, err := n.getNFTByAddrForTable(destinationFilter, "nat") + if err != nil { + return err + } + nat, err := linuxfw.CreateTableIfNotExist(n.conn, table.Proto, "nat") + if err != nil { + return fmt.Errorf("error ensuring nat table exists: %w", err) + } + table.Nat = nat + + // ensure postrouting chain exists + var postRoutingCh *nftables.Chain + if postRoutingCh, err = linuxfw.CreateChainIfNotExist(n.conn, linuxfw.ChainInfo{ + Table: nat, + Name: postRoutingChain, + ChainType: nftables.ChainTypeNAT, + ChainHook: nftables.ChainHookPostrouting, + ChainPriority: nftables.ChainPriorityNATSource, + ChainPolicy: &polAccept, + }); err != nil { + return fmt.Errorf("error ensuring postrouting chain: %w", err) + } + + daddrOffset, err := daddrOffsetForFam(table.Proto) + if err != nil { + return fmt.Errorf("error determining destination address offset: %w", err) + } + daddrLen, err := ipAddressLenForFam(table.Proto) + if err != nil { + return fmt.Errorf("error determining ip address length: %w", err) + } + ip, err := ipForFam(table.Proto, destinationFilter) + if err != nil { + return fmt.Errorf("error parsing ip address: %w", err) + } + + snatRule := &nftables.Rule{ + Table: nat, + Chain: postRoutingCh, + Exprs: []expr.Any{ + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseNetworkHeader, + Offset: daddrOffset, + Len: daddrLen, + }, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: ip, + }, + &expr.Masq{}, + }, + } + // Tailnet egress IP is passed to the proxy as an env var- that means + // that if it ever changes Pods will be restarted, so we don't have to + // worry about muliple rules with different destination addresses + n.conn.AddRule(snatRule) + n.conn.Flush() + return nil +} + +func (n *nftablesRunner) addClamping(_ netip.Addr) error { + polAccept := nftables.ChainPolicyAccept + + for _, fam := range n.getTables() { + filterTable, err := linuxfw.CreateTableIfNotExist(n.conn, fam.Proto, "filter") + if err != nil { + return fmt.Errorf("error ensuring filter table: %w", err) + } + fam.Filter = filterTable + + // ensure forwarding chain exists + var fwChain *nftables.Chain + + if fwChain, err = linuxfw.CreateChainIfNotExist(n.conn, linuxfw.ChainInfo{ + Table: filterTable, + Name: forwardChain, + ChainType: nftables.ChainTypeFilter, + ChainHook: nftables.ChainHookForward, + ChainPriority: nftables.ChainPriorityFilter, + ChainPolicy: &polAccept, + }); err != nil { + return fmt.Errorf("error ensuring forward chain: %w", err) + } + tcpFlagsOffset, err := tcpFlagsOffsetForFam(fam.Proto) + if err != nil { + return fmt.Errorf("error determining TCP flags offset: %w", err) + } + + clampRule := &nftables.Rule{ + Table: filterTable, + Chain: fwChain, + Exprs: []expr.Any{ + &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: ifname(tailscaleInterface), + }, + &expr.Meta{Key: expr.MetaKeyL4PROTO, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: []byte{unix.IPPROTO_TCP}, + }, + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseTransportHeader, + Offset: tcpFlagsOffset, + Len: 1, + }, + &expr.Bitwise{ + DestRegister: 1, + SourceRegister: 1, + Len: 1, + Mask: []byte{0x02}, + Xor: []byte{0x00}, + }, + &expr.Cmp{ + Op: expr.CmpOpNeq, + Register: 1, + Data: []byte{0x00}, + }, + &expr.Rt{ + Register: 1, + Key: expr.RtTCPMSS, + }, + &expr.Byteorder{ + DestRegister: 1, + SourceRegister: 1, + Op: expr.ByteorderHton, + Len: 2, + Size: 2, + }, + &expr.Exthdr{ + SourceRegister: 1, + Type: 2, + Offset: 2, + Len: 2, + Op: expr.ExthdrOpTcpopt, + }, + }, + } + n.conn.AddRule(clampRule) + } + n.conn.Flush() + return nil +} + +func daddrOffsetForFam(proto nftables.TableFamily) (uint32, error) { + switch proto { + case nftables.TableFamilyIPv4: + return 16, nil + case nftables.TableFamilyIPv6: + return 40, nil + default: + return 0, fmt.Errorf("table family %v is neither IPv4 nor IPv6", proto) + } +} + +func ipAddressLenForFam(proto nftables.TableFamily) (uint32, error) { + switch proto { + case nftables.TableFamilyIPv4: + return 4, nil + case nftables.TableFamilyIPv6: + return 16, nil + default: + return 0, fmt.Errorf("table family %v is neither IPv4 nor IPv6", proto) + } +} + +func nfFamilyConst(proto nftables.TableFamily) (uint32, error) { + switch proto { + case nftables.TableFamilyIPv4: + return unix.NFPROTO_IPV4, nil + case nftables.TableFamilyIPv6: + return unix.NFPROTO_IPV6, nil + default: + return 0, fmt.Errorf("table family %v is neither IPv4 nor IPv6", proto) + } +} + +func tcpFlagsOffsetForFam(proto nftables.TableFamily) (uint32, error) { + switch proto { + case nftables.TableFamilyIPv4: + return 13, nil + case nftables.TableFamilyIPv6: + return 53, nil + default: + return 0, fmt.Errorf("table family %v is neither IPv4 nor IPv6", proto) + } +} + +func ipForFam(proto nftables.TableFamily, ip netip.Addr) (net.IP, error) { + switch proto { + case nftables.TableFamilyIPv4: + return net.ParseIP(ip.String()).To4(), nil + case nftables.TableFamilyIPv6: + return net.ParseIP(ip.String()).To16(), nil + default: + return nil, fmt.Errorf("table family %v is neither IPv4 nor IPv6", proto) + } +} + +func ifname(n string) []byte { + b := make([]byte, 16) + copy(b, []byte(n+"\x00")) + return b +} diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 893495063..377ffddb3 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -105,6 +105,7 @@ func main() { Socket: defaultEnv("TS_SOCKET", "/tmp/tailscaled.sock"), AuthOnce: defaultBool("TS_AUTH_ONCE", false), Root: defaultEnv("TS_TEST_ONLY_ROOT", "/"), + FirewallMode: defaultEnv("TS_FIREWALL_MODE", ""), } if cfg.ProxyTo != "" && cfg.UserspaceMode { @@ -173,6 +174,13 @@ func main() { } } + // determine firewall mode once here to ensure that tailscaled and proxy + // code use the same + mode := determineProxyFirewallMode() + if err := os.Setenv("TS_DEBUG_FIREWALL_MODE", string(mode)); err != nil { + log.Fatalf("error setting TS_DEBUG_FIREWALL_MODE: %v", err) + } + client, daemonPid, err := startTailscaled(bootCtx, cfg) if err != nil { log.Fatalf("failed to bring up tailscale: %v", err) @@ -286,6 +294,13 @@ authLoop: if cfg.ServeConfigPath != "" { go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client) } + var nfr netfilterRunner + if wantProxy { + nfr, err = newNetFilterRunner(mode) + if err != nil { + log.Fatalf("error creating new netfilter runner: %v", err) + } + } for { n, err := w.Next() if err != nil { @@ -305,8 +320,8 @@ authLoop: newCurrentIPs := deephash.Hash(&addrs) ipsHaveChanged := newCurrentIPs != currentIPs if cfg.ProxyTo != "" && len(addrs) > 0 && ipsHaveChanged { - log.Printf("Installing proxy rules") - if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs); err != nil { + log.Printf("Installing ingress proxy rules") + if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs, nfr); err != nil { log.Fatalf("installing ingress proxy rules: %v", err) } } @@ -321,7 +336,8 @@ authLoop: } } if cfg.TailnetTargetIP != "" && ipsHaveChanged && len(addrs) > 0 { - if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs); err != nil { + log.Printf("Installing egress proxy rules") + if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs, nfr); err != nil { log.Fatalf("installing egress proxy rules: %v", err) } } @@ -653,16 +669,12 @@ func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, routes string return nil } -func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix) error { +func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr netfilterRunner) error { dst, err := netip.ParseAddr(dstStr) if err != nil { return err } - argv0 := "iptables" - if dst.Is6() { - argv0 = "ip6tables" - } - var local string + var local netip.Addr for _, pfx := range tsIPs { if !pfx.IsSingleIP() { continue @@ -670,52 +682,34 @@ func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []net if pfx.Addr().Is4() != dst.Is4() { continue } - local = pfx.Addr().String() + local = pfx.Addr() break } - if local == "" { + + if local.String() == "" { return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) } - // Technically, if the control server ever changes the IPs assigned to this - // node, we'll slowly accumulate iptables rules. This shouldn't happen, so - // for now we'll live with it. - // Set up a rule that ensures that all packets - // except for those received on tailscale0 interface is forwarded to - // destination address - cmdDNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "!", "-i", "tailscale0", "-j", "DNAT", "--to-destination", dstStr) - cmdDNAT.Stdout = os.Stdout - cmdDNAT.Stderr = os.Stderr - if err := cmdDNAT.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + err = nfr.addEgressDNAT(dst) + if err != nil { + return fmt.Errorf("error setting up egress DNAT: %w", err) } - // Set up a rule that ensures that all packets sent to the destination - // address will have the proxy's IP set as source IP - cmdSNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "POSTROUTING", "1", "--destination", dstStr, "-j", "SNAT", "--to-source", local) - cmdSNAT.Stdout = os.Stdout - cmdSNAT.Stderr = os.Stderr - if err := cmdSNAT.Run(); err != nil { - return fmt.Errorf("setting up SNAT via iptables failed: %w", err) + err = nfr.addEgressSNAT(local, dst) + if err != nil { + return fmt.Errorf("error setting up egress SNAT: %w", err) + } + if err := nfr.addClamping(dst); err != nil { + return fmt.Errorf("error setting up clamping rule: %w", err) } - cmdClamp := exec.CommandContext(ctx, argv0, "-t", "mangle", "-A", "FORWARD", "-o", "tailscale0", "-p", "tcp", "-m", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") - cmdClamp.Stdout = os.Stdout - cmdClamp.Stderr = os.Stderr - if err := cmdClamp.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) - } return nil } -func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix) error { +func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr netfilterRunner) error { dst, err := netip.ParseAddr(dstStr) if err != nil { return err } - argv0 := "iptables" - if dst.Is6() { - argv0 = "ip6tables" - } - var local string + var local netip.Addr for _, pfx := range tsIPs { if !pfx.IsSingleIP() { continue @@ -723,26 +717,18 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne if pfx.Addr().Is4() != dst.Is4() { continue } - local = pfx.Addr().String() + local = pfx.Addr() break } - if local == "" { + if local.String() == "" { return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) } - // Technically, if the control server ever changes the IPs assigned to this - // node, we'll slowly accumulate iptables rules. This shouldn't happen, so - // for now we'll live with it. - cmd := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "-d", local, "-j", "DNAT", "--to-destination", dstStr) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + err = nfr.addIngressDNAT(dst, local) + if err != nil { + return fmt.Errorf("error setting up ingress dnat: %w", err) } - cmdClamp := exec.CommandContext(ctx, argv0, "-t", "mangle", "-A", "FORWARD", "-o", "tailscale0", "-p", "tcp", "-m", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") - cmdClamp.Stdout = os.Stdout - cmdClamp.Stderr = os.Stderr - if err := cmdClamp.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + if err = nfr.addClamping(dst); err != nil { + return fmt.Errorf("error setting clamping rule: %w", err) } return nil } @@ -774,6 +760,7 @@ type settings struct { AuthOnce bool Root string KubernetesCanPatch bool + FirewallMode string } // defaultEnv returns the value of the given envvar name, or defVal if diff --git a/cmd/containerboot/main_test.go b/cmd/containerboot/main_test.go index 2561e6724..09aec3270 100644 --- a/cmd/containerboot/main_test.go +++ b/cmd/containerboot/main_test.go @@ -311,55 +311,6 @@ func TestContainerBoot(t *testing.T) { }, }, }, - { - Name: "ingres proxy", - Env: map[string]string{ - "TS_AUTHKEY": "tskey-key", - "TS_DEST_IP": "1.2.3.4", - "TS_USERSPACE": "false", - }, - Phases: []phase{ - { - WantCmds: []string{ - "/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=mem: --statedir=/tmp", - "/usr/bin/tailscale --socket=/tmp/tailscaled.sock login --authkey=tskey-key", - }, - }, - { - Notify: runningNotify, - WantCmds: []string{ - "/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false", - "/usr/bin/iptables -t nat -I PREROUTING 1 -d 100.64.0.1 -j DNAT --to-destination 1.2.3.4", - "/usr/bin/iptables -t mangle -A FORWARD -o tailscale0 -p tcp -m tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu", - }, - }, - }, - }, - { - Name: "egress proxy", - Env: map[string]string{ - "TS_AUTHKEY": "tskey-key", - "TS_TAILNET_TARGET_IP": "100.99.99.99", - "TS_USERSPACE": "false", - }, - Phases: []phase{ - { - WantCmds: []string{ - "/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=mem: --statedir=/tmp", - "/usr/bin/tailscale --socket=/tmp/tailscaled.sock login --authkey=tskey-key", - }, - }, - { - Notify: runningNotify, - WantCmds: []string{ - "/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false", - "/usr/bin/iptables -t nat -I PREROUTING 1 ! -i tailscale0 -j DNAT --to-destination 100.99.99.99", - "/usr/bin/iptables -t nat -I POSTROUTING 1 --destination 100.99.99.99 -j SNAT --to-source 100.64.0.1", - "/usr/bin/iptables -t mangle -A FORWARD -o tailscale0 -p tcp -m tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu", - }, - }, - }, - }, { Name: "authkey_once", Env: map[string]string{ @@ -1092,3 +1043,5 @@ func (k *kubeServer) serveSecret(w http.ResponseWriter, r *http.Request) { panic(fmt.Sprintf("unhandled HTTP method %q", r.Method)) } } + +// TODO (irbekrm): add separate tests for installIngressForwardingRule/installEgressForwardingRule diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 6ddee7efb..99c6be176 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -47,11 +47,12 @@ func main() { tailscale.I_Acknowledge_This_API_Is_Unstable = true var ( - tsNamespace = defaultEnv("OPERATOR_NAMESPACE", "") - tslogging = defaultEnv("OPERATOR_LOGGING", "info") - image = defaultEnv("PROXY_IMAGE", "tailscale/tailscale:latest") - priorityClassName = defaultEnv("PROXY_PRIORITY_CLASS_NAME", "") - tags = defaultEnv("PROXY_TAGS", "tag:k8s") + tsNamespace = defaultEnv("OPERATOR_NAMESPACE", "") + tslogging = defaultEnv("OPERATOR_LOGGING", "info") + image = defaultEnv("PROXY_IMAGE", "tailscale/tailscale:latest") + priorityClassName = defaultEnv("PROXY_PRIORITY_CLASS_NAME", "") + tags = defaultEnv("PROXY_TAGS", "tag:k8s") + tsDebugFirewallMode = defaultEnv("PROXY_FIREWALL_MODE", "") ) var opts []kzap.Opts @@ -70,7 +71,7 @@ func main() { defer s.Close() restConfig := config.GetConfigOrDie() maybeLaunchAPIServerProxy(zlog, restConfig, s) - runReconcilers(zlog, s, tsNamespace, restConfig, tsClient, image, priorityClassName, tags) + runReconcilers(zlog, s, tsNamespace, restConfig, tsClient, image, priorityClassName, tags, tsDebugFirewallMode) } // initTSNet initializes the tsnet.Server and logs in to Tailscale. It uses the @@ -179,7 +180,7 @@ waitOnline: // runReconcilers starts the controller-runtime manager and registers the // ServiceReconciler. It blocks forever. -func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string, restConfig *rest.Config, tsClient *tailscale.Client, image, priorityClassName, tags string) { +func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string, restConfig *rest.Config, tsClient *tailscale.Client, image, priorityClassName, tags, firewallMode string) { var ( isDefaultLoadBalancer = defaultBool("OPERATOR_DEFAULT_LOAD_BALANCER", false) ) @@ -228,6 +229,7 @@ func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string operatorNamespace: tsNamespace, proxyImage: image, proxyPriorityClassName: priorityClassName, + firewallMode: firewallMode, } err = builder. ControllerManagedBy(mgr). diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index 90d5b1d46..d9e2ac045 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -79,6 +79,7 @@ type tailscaleSTSReconciler struct { operatorNamespace string proxyImage string proxyPriorityClassName string + firewallMode string } // IsHTTPSEnabledOnTailnet reports whether HTTPS is enabled on the tailnet. @@ -319,6 +320,12 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Name: "TS_HOSTNAME", Value: sts.Hostname, }) + if a.firewallMode != "" { + container.Env = append(container.Env, corev1.EnvVar{ + Name: "TS_FIREWALL_MODE", + Value: a.firewallMode, + }) + } if sts.ClusterTargetIP != "" { container.Env = append(container.Env, corev1.EnvVar{ Name: "TS_DEST_IP",