mirror of
https://github.com/tailscale/tailscale.git
synced 2026-05-04 19:56:35 +02:00
wgengine/netstack: absorb all quad-100 traffic locally, never leak to peers
Previously, handleLocalPackets intercepted traffic to the Tailscale
service IP (100.100.100.100 / fd7a:115c:a1e0::53) only for an allow-list
of ports: TCP 53/80/8080 and UDP 53. Any other port returned
filter.Accept, letting the packet fall through to the ACL filter and
wireguard-go, which would attempt a peer lookup. No peer owns the
quad-100 AllowedIP, so after ~5s pendopen.go would log:
open-conn-track: timeout opening ...; no associated peer node
This is the common "conntrack error no peer found for 100.100.100.100:853"
log spam seen in the wild (e.g. from systemd-resolved or another
resolver speculatively trying DoT on quad-100). It also leaks quad-100
packets onto the tailnet.
Remove the port allow-list so handleLocalPackets absorbs every quad-100
packet into netstack regardless of IP protocol or port. Traffic never
reaches the conntrack / peer-routing layers.
With the allow-list gone, acceptTCP needs a corresponding guard: on a
quad-100 TCP port we don't serve, execution used to fall through to the
isTailscaleIP case (quad-100 is in the tailscale IP range), which
rewrote the dial target to 127.0.0.1:<port> and forwardTCP'd the
connection to whatever happened to be listening on the host's loopback
at that port. Add a hittingServiceIP case that RSTs cleanly instead,
placed before the isTailscaleIP fallthrough.
TestQuad100UnservedTCPPortDoesNotForward is a new integration test that
injects a TCP SYN to 100.100.100.100:853 via handleLocalPackets, stubs
forwardDialFunc, and asserts the dialer is not invoked; it catches
regressions of the acceptTCP recursion/loopback-redirection case.
Fixes #15796
Fixes #19421
Updates #3261
Updates #11305
Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
parent
006d7e180e
commit
1b40911611
@ -845,20 +845,27 @@ func (ns *Impl) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper, gro *gro.
|
||||
serviceName, isVIPServiceIP := ns.atomicIPVIPServiceMap.Load()[dst]
|
||||
switch {
|
||||
case dst == serviceIP || dst == serviceIPv6:
|
||||
// We want to intercept some traffic to the "service IP" (e.g.
|
||||
// 100.100.100.100 for IPv4). However, of traffic to the
|
||||
// service IP, we only care about UDP 53, and TCP on port 53,
|
||||
// 80, and 8080.
|
||||
switch p.IPProto {
|
||||
case ipproto.TCP:
|
||||
if port := p.Dst.Port(); port != 53 && port != 80 && port != 8080 && !ns.isLoopbackPort(port) {
|
||||
return filter.Accept, gro
|
||||
}
|
||||
case ipproto.UDP:
|
||||
if port := p.Dst.Port(); port != 53 && !ns.isLoopbackPort(port) {
|
||||
return filter.Accept, gro
|
||||
}
|
||||
}
|
||||
// Traffic to the Tailscale service IP (100.100.100.100 /
|
||||
// fd7a:115c:a1e0::53) is always terminated locally on this
|
||||
// node; it must never be forwarded out over WireGuard to a
|
||||
// peer. Netstack's TCP/UDP acceptors handle the ports we
|
||||
// actually serve (UDP 53 MagicDNS, TCP 53/80/8080 for DNS,
|
||||
// the web client, and Taildrive, plus any debug loopback
|
||||
// port). Other ports are rejected cleanly by netstack: UDP
|
||||
// closes the endpoint in acceptUDP, and TCP is RST'd by
|
||||
// acceptTCP's hittingServiceIP guard.
|
||||
//
|
||||
// Previously we returned filter.Accept for TCP/UDP on any
|
||||
// other port, which let the packet fall through to the ACL
|
||||
// filter and ultimately wireguard-go, where no peer owns the
|
||||
// quad-100 AllowedIP. That produced noisy "open-conn-track:
|
||||
// timeout opening ...; no associated peer node" log lines
|
||||
// (e.g. for stray traffic to 100.100.100.100:853 / DoT) and
|
||||
// leaked quad-100 packets onto the tailnet.
|
||||
//
|
||||
// We now unconditionally absorb quad-100 into netstack here,
|
||||
// regardless of IP protocol or port, so such traffic never
|
||||
// reaches the conntrack / peer-routing layers.
|
||||
case isVIPServiceIP:
|
||||
// returns all active VIP services in a set, since the IPVIPServiceMap
|
||||
// contains inactive service IPs when node hosts the service, we need to
|
||||
@ -1654,6 +1661,24 @@ func (ns *Impl) acceptTCP(r *tcp.ForwarderRequest) {
|
||||
} else {
|
||||
dialIP = ipv4Loopback
|
||||
}
|
||||
case hittingServiceIP:
|
||||
// TCP to the Tailscale service IP on a port we don't serve
|
||||
// (anything other than DNS/53, web client/80, Taildrive/8080,
|
||||
// or the debug loopback port handled above). handleLocalPackets
|
||||
// absorbs all quad-100 traffic into netstack to prevent it
|
||||
// from leaking to WireGuard peers as noisy "open-conn-track:
|
||||
// timeout opening ...; no associated peer node" log lines
|
||||
// (see the comment there).
|
||||
//
|
||||
// Without this explicit guard, execution would fall through
|
||||
// to the isTailscaleIP case below (quad-100 is in the
|
||||
// tailscale IP range), rewriting the dial target to
|
||||
// 127.0.0.1:<port> and forwardTCP'ing the connection onto
|
||||
// whatever random service happens to be listening on the
|
||||
// host's loopback at that port. Reject cleanly with a RST
|
||||
// here instead.
|
||||
r.Complete(true) // sends a RST
|
||||
return
|
||||
case isTailscaleIP:
|
||||
dialIP = ipv4Loopback
|
||||
}
|
||||
|
||||
@ -953,6 +953,7 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
impl.lb.SetIPServiceMappingsForTest(IPServiceMap)
|
||||
|
||||
t.Run("ShouldHandleServiceIP", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: 4,
|
||||
IPProto: ipproto.TCP,
|
||||
@ -965,7 +966,94 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
t.Errorf("got filter outcome %v, want filter.DropSilently", resp)
|
||||
}
|
||||
})
|
||||
// Any port on the quad-100 service IP must be absorbed locally by
|
||||
// netstack and never leak out to the WireGuard / peer-routing
|
||||
// layers. Historically we only intercepted specific ports (UDP 53
|
||||
// and TCP 53/80/8080), causing stray traffic to other ports such
|
||||
// as 100.100.100.100:853 (DoT) to time out in wireguard-go and
|
||||
// produce "open-conn-track: timeout opening ...; no associated
|
||||
// peer node" log spam. See the handleLocalPackets comment.
|
||||
quad100LeakCases := []struct {
|
||||
name string
|
||||
proto ipproto.Proto
|
||||
dst string
|
||||
}{
|
||||
{"TCP-853-DoT-v4", ipproto.TCP, "100.100.100.100:853"},
|
||||
{"TCP-443-DoH-v4", ipproto.TCP, "100.100.100.100:443"},
|
||||
{"TCP-9000-stray-v4", ipproto.TCP, "100.100.100.100:9000"},
|
||||
{"UDP-853-DoQ-v4", ipproto.UDP, "100.100.100.100:853"},
|
||||
{"UDP-443-v4", ipproto.UDP, "100.100.100.100:443"},
|
||||
{"TCP-853-DoT-v6", ipproto.TCP, "[fd7a:115c:a1e0::53]:853"},
|
||||
{"UDP-443-v6", ipproto.UDP, "[fd7a:115c:a1e0::53]:443"},
|
||||
}
|
||||
for _, tc := range quad100LeakCases {
|
||||
t.Run("ShouldNotLeakQuad100_"+tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
dst := netip.MustParseAddrPort(tc.dst)
|
||||
ipVersion := uint8(4)
|
||||
if dst.Addr().Is6() {
|
||||
ipVersion = 6
|
||||
}
|
||||
src := "127.0.0.1:9999"
|
||||
if ipVersion == 6 {
|
||||
src = "[::1]:9999"
|
||||
}
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: ipVersion,
|
||||
IPProto: tc.proto,
|
||||
Src: netip.MustParseAddrPort(src),
|
||||
Dst: dst,
|
||||
}
|
||||
if tc.proto == ipproto.TCP {
|
||||
pkt.TCPFlags = packet.TCPSyn
|
||||
}
|
||||
resp, _ := impl.handleLocalPackets(pkt, impl.tundev, nil)
|
||||
if resp != filter.DropSilently {
|
||||
t.Errorf("quad-100 %s packet leaked: got filter outcome %v, want filter.DropSilently", tc.name, resp)
|
||||
}
|
||||
})
|
||||
}
|
||||
// Exhaustive sweep of all ports for both transport protocols and
|
||||
// both IP versions, confirming no port leaks. The quad-100 branch
|
||||
// of handleLocalPackets is port-independent by construction; this
|
||||
// test serves as a regression guard against accidental port-based
|
||||
// exemptions slipping back in.
|
||||
t.Run("ShouldNotLeakQuad100_AllPorts", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
protos := []ipproto.Proto{ipproto.TCP, ipproto.UDP}
|
||||
dsts := []netip.Addr{
|
||||
netip.MustParseAddr("100.100.100.100"),
|
||||
netip.MustParseAddr("fd7a:115c:a1e0::53"),
|
||||
}
|
||||
for _, proto := range protos {
|
||||
for _, dstAddr := range dsts {
|
||||
ipVersion := uint8(4)
|
||||
srcStr := "127.0.0.1:9999"
|
||||
if dstAddr.Is6() {
|
||||
ipVersion = 6
|
||||
srcStr = "[::1]:9999"
|
||||
}
|
||||
src := netip.MustParseAddrPort(srcStr)
|
||||
for port := 1; port <= 65535; port++ {
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: ipVersion,
|
||||
IPProto: proto,
|
||||
Src: src,
|
||||
Dst: netip.AddrPortFrom(dstAddr, uint16(port)),
|
||||
}
|
||||
if proto == ipproto.TCP {
|
||||
pkt.TCPFlags = packet.TCPSyn
|
||||
}
|
||||
resp, _ := impl.handleLocalPackets(pkt, impl.tundev, nil)
|
||||
if resp != filter.DropSilently {
|
||||
t.Fatalf("port=%d proto=%v dst=%v: got %v, want filter.DropSilently", port, proto, dstAddr, resp)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("ShouldHandle4via6", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: 6,
|
||||
IPProto: ipproto.TCP,
|
||||
@ -988,6 +1076,7 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("ShouldHandleLocalTailscaleServices", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: 4,
|
||||
IPProto: ipproto.TCP,
|
||||
@ -1001,6 +1090,7 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("OtherNonHandled", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
pkt := &packet.Parsed{
|
||||
IPVersion: 6,
|
||||
IPProto: ipproto.TCP,
|
||||
@ -1023,6 +1113,100 @@ func TestHandleLocalPackets(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// TestQuad100UnservedTCPPortDoesNotForward verifies that a TCP SYN to the
|
||||
// Tailscale service IP (100.100.100.100) on a port we don't serve is
|
||||
// absorbed by netstack and rejected cleanly, without triggering the
|
||||
// outbound forwardTCP dialer.
|
||||
//
|
||||
// handleLocalPackets now absorbs all quad-100 traffic regardless of
|
||||
// port to prevent it leaking to WireGuard peers (which produced noisy
|
||||
// "open-conn-track: timeout opening ...; no associated peer node" log
|
||||
// lines). That leaves acceptTCP responsible for rejecting connections
|
||||
// to ports we don't handle; without an explicit guard, execution would
|
||||
// fall through to the isTailscaleIP case (quad-100 is in the tailscale
|
||||
// range), rewriting the dial target to 127.0.0.1:<port> and forwarding
|
||||
// the connection to whatever random service happened to be listening
|
||||
// on the host's loopback at that port.
|
||||
//
|
||||
// This test asserts that the forward dialer is NOT invoked for quad-100
|
||||
// SYNs on unserved ports; the guard in acceptTCP must RST instead.
|
||||
func TestQuad100UnservedTCPPortDoesNotForward(t *testing.T) {
|
||||
impl := makeNetstack(t, func(impl *Impl) {
|
||||
impl.ProcessSubnets = false
|
||||
impl.ProcessLocalIPs = false
|
||||
impl.atomicIsLocalIPFunc.Store(looksLikeATailscaleSelfAddress)
|
||||
})
|
||||
|
||||
dialFn, gotConn := makeHangDialer(t)
|
||||
impl.forwardDialFunc = dialFn
|
||||
|
||||
// Use a client IP in the CGNAT range so shouldProcessInbound-adjacent
|
||||
// code paths treat this as plausibly-peer-sourced traffic, matching
|
||||
// what a real stray quad-100 probe from the host OS would look like.
|
||||
client := netip.MustParseAddr("100.101.102.103")
|
||||
quad100 := tsaddr.TailscaleServiceIP()
|
||||
|
||||
// 853 is DoT, the specific case called out in the original bug
|
||||
// report ("conntrack error no peer found for 100.100.100.100:853").
|
||||
// Before the fix, port 853 (and any non-{53,80,8080} port) leaked
|
||||
// out to WireGuard; after the fix it is absorbed here and must NOT
|
||||
// trigger forwardTCP.
|
||||
pkt := tcp4syn(t, client, quad100, 1234, 853)
|
||||
var parsed packet.Parsed
|
||||
parsed.Decode(pkt)
|
||||
|
||||
resp, _ := impl.handleLocalPackets(&parsed, impl.tundev, nil)
|
||||
if resp != filter.DropSilently {
|
||||
t.Fatalf("handleLocalPackets for quad-100:853: got %v, want filter.DropSilently", resp)
|
||||
}
|
||||
|
||||
// acceptTCP runs asynchronously in the gVisor TCP dispatcher after
|
||||
// handleLocalPackets injects the packet into netstack. Use the
|
||||
// in-flight connection counter as a deterministic synchronization
|
||||
// point: wrapTCPProtocolHandler increments connsInFlightByClient
|
||||
// when the dispatcher hands the connection off to acceptTCP, and
|
||||
// acceptTCP's deferred decrementInFlightTCPForward decrements it
|
||||
// on return.
|
||||
//
|
||||
// On the green path (RST guard fires), acceptTCP returns promptly
|
||||
// and the counter reaches 0. On the red path (fall-through to
|
||||
// forwardTCP), acceptTCP blocks inside the forwardDialFunc call —
|
||||
// makeHangDialer signals gotConn on entry (buffered, non-blocking)
|
||||
// and then blocks forever — so the counter never reaches 0 but
|
||||
// gotConn fires synchronously from the dispatcher goroutine. A
|
||||
// select on both races those outcomes without real-time padding.
|
||||
//
|
||||
// testing/synctest is not usable here: gVisor's sleep package calls
|
||||
// the runtime's gopark directly rather than via the standard
|
||||
// library, so synctest.Wait() cannot observe those goroutines
|
||||
// becoming durably blocked and hangs indefinitely.
|
||||
inFlightZero := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
impl.mu.Lock()
|
||||
n := impl.connsInFlightByClient[client]
|
||||
impl.mu.Unlock()
|
||||
if n == 0 {
|
||||
close(inFlightZero)
|
||||
return
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-gotConn:
|
||||
t.Fatalf("forwardDialFunc was called for quad-100:853; acceptTCP fell through to forwardTCP instead of sending RST. This means stray traffic to quad-100 on unserved ports is being redirected to the host's loopback at the same port.")
|
||||
case <-inFlightZero:
|
||||
// acceptTCP returned cleanly; the RST guard fired.
|
||||
case <-time.After(5 * time.Second):
|
||||
// Safety net so a regression in the in-flight counter plumbing
|
||||
// doesn't hang the whole test run; both outcomes above should
|
||||
// fire within milliseconds in practice.
|
||||
t.Fatal("timed out waiting for acceptTCP to dispatch quad-100:853 SYN")
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldSendToHost(t *testing.T) {
|
||||
var (
|
||||
selfIP4 = netip.MustParseAddr("100.64.1.2")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user