tailscale/util/linuxfw/linuxfw.go
Mike O'Driscoll a058d04afb cmd,ipn,util,wgengine: add --exit-node-allow-wan-ports flag for incoming WAN connections
Add a new `tailscale set` flag that allows incoming WAN connections on
specified proto:port pairs to bypass exit node routing. When a node uses
an exit node, reply traffic for externally-initiated connections gets
captured by the exit node's default route, breaking any service the
machine hosts on its public IP but preserving privacy. This flag
installs port-specific conntrack-based firewall rules that marks
replies to matching inbound connections with the Tailscale bypass
fwmark (0x80000), causing them to route through the main table instead
of the exit node tunnel.

Usage: tailscale set --exit-node-allow-wan-ports=tcp:22,tcp:443 --accept-risk=wan-bypass

For each proto:port entry, two firewall rules are created:
- mangle/PREROUTING: tags connections on non-tailscale interfaces for
  the specified destination port with the bypass conntrack mark. Matches
  all connection states (not just NEW) so existing connections get
  tagged when rules are installed after an exit node is activated.
- mangle/OUTPUT: sets the bypass fwmark on ESTABLISHED/RELATED replies
  (matched by source port) so they route via the physical interface

WAN bypass rules are installed before routes in the router's Set()
method to avoid a window where the exit node route is active but no
bypass rules exist, which would drop existing connections.

Implements both iptables and nftables backends. The nftables OUTPUT
rules use a separate chain (ts-wan-bypass) with ChainTypeRoute to
trigger re-routing when the packet mark changes.

Also fixes a pre-existing byte-order bug in the nftables backend where
mark-related helper functions (getTailscaleFwmarkMask, etc.) used
hardcoded big-endian byte arrays instead of native byte order. On
little-endian systems (all x86), the nftables Bitwise expressions
operated on the wrong bits, making the connmark save/restore rules
(rp_filter workaround) silently ineffective. Changed all mark byte
helpers to use binary.NativeEndian.

Updates #10940

Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
2026-05-01 16:52:26 +00:00

206 lines
5.8 KiB
Go

// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
//go:build linux
// Package linuxfw returns the kind of firewall being used by the kernel.
package linuxfw
import (
"encoding/binary"
"errors"
"fmt"
"os"
"strconv"
"strings"
"github.com/tailscale/netlink"
"tailscale.com/feature"
"tailscale.com/tsconst"
"tailscale.com/types/logger"
)
// MatchDecision is the decision made by the firewall for a packet matched by a rule.
// It is used to decide whether to accept or masquerade a packet in addMatchSubnetRouteMarkRule.
type MatchDecision int
const (
Accept MatchDecision = iota
Masq
)
type FWModeNotSupportedError struct {
Mode FirewallMode
Err error
}
func (e FWModeNotSupportedError) Error() string {
return fmt.Sprintf("firewall mode %q not supported: %v", e.Mode, e.Err)
}
func (e FWModeNotSupportedError) Is(target error) bool {
_, ok := target.(FWModeNotSupportedError)
return ok
}
func (e FWModeNotSupportedError) Unwrap() error {
return e.Err
}
type FirewallMode string
const (
FirewallModeIPTables FirewallMode = "iptables"
FirewallModeNfTables FirewallMode = "nftables"
)
type CGNATMode string
const (
CGNATModeDrop CGNATMode = "DROP"
CGNATModeReturn CGNATMode = "RETURN"
)
// The following bits are added to packet marks for Tailscale use.
//
// We tried to pick bits sufficiently out of the way that it's
// unlikely to collide with existing uses. We have 4 bytes of mark
// bits to play with. We leave the lower byte alone on the assumption
// that sysadmins would use those. Kubernetes uses a few bits in the
// second byte, so we steer clear of that too.
//
// Empirically, most of the documentation on packet marks on the
// internet gives the impression that the marks are 16 bits
// wide. Based on this, we theorize that the upper two bytes are
// relatively unused in the wild, and so we consume bits 16:23 (the
// third byte).
//
// The constants are in the iptables/iproute2 string format for
// matching and setting the bits, so they can be directly embedded in
// commands.
const (
fwmarkMask = tsconst.LinuxFwmarkMask
fwmarkMaskNum = tsconst.LinuxFwmarkMaskNum
subnetRouteMark = tsconst.LinuxSubnetRouteMark
subnetRouteMarkNum = tsconst.LinuxSubnetRouteMarkNum
bypassMark = tsconst.LinuxBypassMark
bypassMarkNum = tsconst.LinuxBypassMarkNum
)
// getTailscaleFwmarkMaskNeg returns the negation of TailscaleFwmarkMask
// in native byte order.
func getTailscaleFwmarkMaskNeg() []byte {
return nativeEndianUint32(^uint32(fwmarkMaskNum))
}
// getTailscaleFwmarkMask returns the TailscaleFwmarkMask in native byte order.
func getTailscaleFwmarkMask() []byte {
return nativeEndianUint32(fwmarkMaskNum)
}
// getTailscaleSubnetRouteMark returns the TailscaleSubnetRouteMark
// in native byte order.
func getTailscaleSubnetRouteMark() []byte {
return nativeEndianUint32(subnetRouteMarkNum)
}
// getTailscaleBypassMark returns the TailscaleBypassMark in native byte order.
func getTailscaleBypassMark() []byte {
return nativeEndianUint32(bypassMarkNum)
}
// nativeEndianUint32 returns v as a 4-byte slice in the host's native byte order.
func nativeEndianUint32(v uint32) []byte {
b := make([]byte, 4)
binary.NativeEndian.PutUint32(b, v)
return b
}
// checkIPv6ForTest can be set in tests.
var checkIPv6ForTest func(logger.Logf) error
// checkIPv6 checks whether the system appears to have a working IPv6
// network stack. It returns an error explaining what looks wrong or
// missing. It does not check that IPv6 is currently functional or
// that there's a global address, just that the system would support
// IPv6 if it were on an IPv6 network.
func CheckIPv6(logf logger.Logf) error {
if f := checkIPv6ForTest; f != nil {
return f(logf)
}
_, err := os.Stat("/proc/sys/net/ipv6")
if os.IsNotExist(err) {
return err
}
bs, err := os.ReadFile("/proc/sys/net/ipv6/conf/all/disable_ipv6")
if err != nil {
// Be conservative if we can't find the IPv6 configuration knob.
return err
}
disabled, err := strconv.ParseBool(strings.TrimSpace(string(bs)))
if err != nil {
return errors.New("disable_ipv6 has invalid bool")
}
if disabled {
return errors.New("disable_ipv6 is set")
}
// Older kernels don't support IPv6 policy routing. Some kernels
// support policy routing but don't have this knob, so absence of
// the knob is not fatal.
bs, err = os.ReadFile("/proc/sys/net/ipv6/conf/all/disable_policy")
if err == nil {
disabled, err = strconv.ParseBool(strings.TrimSpace(string(bs)))
if err != nil {
return errors.New("disable_policy has invalid bool")
}
if disabled {
return errors.New("disable_policy is set")
}
}
if err := CheckIPRuleSupportsV6(logf); err != nil {
return fmt.Errorf("kernel doesn't support IPv6 policy routing: %w", err)
}
return nil
}
func CheckIPRuleSupportsV6(logf logger.Logf) error {
// First try just a read-only operation to ideally avoid
// having to modify any state.
if rules, err := netlink.RuleList(netlink.FAMILY_V6); err != nil {
return fmt.Errorf("querying IPv6 policy routing rules: %w", err)
} else {
if len(rules) > 0 {
logf("[v1] kernel supports IPv6 policy routing (found %d rules)", len(rules))
return nil
}
}
// Try to actually create & delete one as a test.
rule := netlink.NewRule()
rule.Priority = 1234
rule.Mark = bypassMarkNum
rule.Table = 52
rule.Family = netlink.FAMILY_V6
// First delete the rule unconditionally, and don't check for
// errors. This is just cleaning up anything that might be already
// there.
netlink.RuleDel(rule)
// And clean up on exit.
defer netlink.RuleDel(rule)
return netlink.RuleAdd(rule)
}
var hookIPTablesCleanup feature.Hook[func(logger.Logf)]
// IPTablesCleanUp removes all Tailscale added iptables rules.
// Any errors that occur are logged to the provided logf.
func IPTablesCleanUp(logf logger.Logf) {
if f, ok := hookIPTablesCleanup.GetOk(); ok {
f(logf)
}
}