From 35dc1fea725f3b947870ca9994bfbead26433eaf Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Mon, 26 Feb 2024 14:34:56 -0500 Subject: [PATCH] wgengine/netstack: add debug page for TCP forwarder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To help in debugging issues with subnet routers in userspace mode–for example, hitting the max inflight limit. Updates tailscale/corp#12184 Signed-off-by: Andrew Dunham Change-Id: Idd922f4ae37695f6598a914c2d050574755ef309 --- cmd/tailscaled/depaware.txt | 2 +- cmd/tailscaled/tailscaled.go | 2 + wgengine/netstack/debug.go | 129 ++++++++++++++++++++++++++++++++++ wgengine/netstack/netstack.go | 35 +++++++++ 4 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 wgengine/netstack/debug.go diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 7d25abd2a..614369469 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -508,7 +508,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de hash/fnv from tailscale.com/wgengine/magicsock hash/maphash from go4.org/mem html from html/template+ - html/template from github.com/gorilla/csrf + html/template from github.com/gorilla/csrf+ io from archive/tar+ io/fs from archive/tar+ io/ioutil from github.com/aws/aws-sdk-go-v2/aws/protocol/query+ diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go index 8c4365548..3ca77dbf8 100644 --- a/cmd/tailscaled/tailscaled.go +++ b/cmd/tailscaled/tailscaled.go @@ -755,6 +755,8 @@ func newNetstack(logf logger.Logf, sys *tsd.System) (*netstack.Impl, error) { // Only register debug info if we have a debug mux if debugMux != nil { expvar.Publish("netstack", ret.ExpVar()) + + debugMux.HandleFunc("/debug/netstack/tcp-forwarder", ret.DebugTCPForwarder) } return ret, nil } diff --git a/wgengine/netstack/debug.go b/wgengine/netstack/debug.go new file mode 100644 index 000000000..1259f63a9 --- /dev/null +++ b/wgengine/netstack/debug.go @@ -0,0 +1,129 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package netstack + +import ( + "html/template" + "net/http" + "net/netip" + "slices" + "time" + + xmaps "golang.org/x/exp/maps" + "gvisor.dev/gvisor/pkg/tcpip" +) + +var tcpForwarderTemplate = template.Must(template.New("").Parse(` + + + + + + +

TCP Forwarder

+ +

TCP Statistics

+ + + + + + {{ range .Stats }} + + {{ end }} +
MetricValue
{{ .Key }}{{ .Value }}
+ +

In-Flight Outbound Connections

+ + + + + + + {{ range .InFlightDials }} + + + + + + {{ end }} +
Start TimeClient IPRemote IP
{{ .Start.Format "2006-01-02T15:04:05Z07:00" }} ({{ printf "%.2f" .DurationSecs }} seconds ago){{ .ClientIP }}{{ .RemoteAddr }}
+ + +`)) + +// DebugTCPForwarder writes debug information about this netstack +// implementation's current TCP forwarder in HTML format. +func (ns *Impl) DebugTCPForwarder(w http.ResponseWriter, r *http.Request) { + // Grab data while holding the mutex + ns.tcpDebugMu.Lock() + tcpDials := xmaps.Values(ns.inFlightDials) + ns.tcpDebugMu.Unlock() + + slices.SortFunc(tcpDials, func(a, b tcpDialInfo) int { + return a.start.Compare(b.start) + }) + + type templateDataStats struct { + Key string + Value uint64 + } + type templateDataDial struct { + Start time.Time + DurationSecs float64 + ClientIP netip.Addr + RemoteAddr netip.AddrPort + } + type templateData struct { + Stats []templateDataStats + InFlightDials []templateDataDial + } + + var data templateData + + // Statistics from gVisor + tcpStats := ns.ipstack.Stats().TCP + tcpMetrics := []struct { + name string + field *tcpip.StatCounter + }{ + {"Active Connection Openings", tcpStats.ActiveConnectionOpenings}, + {"Passive Connection Openings", tcpStats.PassiveConnectionOpenings}, + {"Established Connections", tcpStats.CurrentEstablished}, + {"Connected Connections", tcpStats.CurrentConnected}, + {"Dropped In-Flight Forwarder Connections", tcpStats.ForwardMaxInFlightDrop}, + {"Established Resets", tcpStats.EstablishedResets}, + {"Established Timeout", tcpStats.EstablishedTimedout}, + {"Failed Connection Attempts", tcpStats.FailedConnectionAttempts}, + {"Retransmits", tcpStats.Retransmits}, + {"Timeouts", tcpStats.Timeouts}, + {"Checksum Errors", tcpStats.ChecksumErrors}, + {"Failed Port Reservations", tcpStats.FailedPortReservations}, + } + for _, metric := range tcpMetrics { + data.Stats = append(data.Stats, templateDataStats{ + Key: metric.name, + Value: metric.field.Value(), + }) + } + + // Any in-flight DialContext calls in the TCP forwarding path. + now := time.Now() + for _, dial := range tcpDials { + elapsed := now.Sub(dial.start) + data.InFlightDials = append(data.InFlightDials, templateDataDial{ + Start: dial.start, + DurationSecs: elapsed.Seconds(), + ClientIP: dial.clientRemoteIP, + RemoteAddr: dial.dialAddr, + }) + } + + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.Header().Set("Cache-Control", "no-cache") + tcpForwarderTemplate.Execute(w, &data) +} diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go index ae2513b33..ccb590fff 100644 --- a/wgengine/netstack/netstack.go +++ b/wgengine/netstack/netstack.go @@ -150,6 +150,18 @@ type Impl struct { // TCP connections, so they can be unregistered when connections are // closed. connsOpenBySubnetIP map[netip.Addr]int + + // Debug information for the TCP forwarding code; all fields protected + // by tcpDebugMu. + tcpDebugMu sync.Mutex + inFlightDialCtr int + inFlightDials map[int]tcpDialInfo // keyed by a random integer +} + +type tcpDialInfo struct { + clientRemoteIP netip.Addr + dialAddr netip.AddrPort + start time.Time } const nicID = 1 @@ -242,6 +254,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi connsOpenBySubnetIP: make(map[netip.Addr]int), dns: dns, tailFSForLocal: tailFSForLocal, + inFlightDials: make(map[int]tcpDialInfo), } ns.ctx, ns.ctxCancel = context.WithCancel(context.Background()) ns.atomicIsLocalIPFunc.Store(tsaddr.FalseContainsIPFunc()) @@ -981,6 +994,24 @@ func (ns *Impl) acceptTCP(r *tcp.ForwarderRequest) { } } +func (ns *Impl) debugForwardedTCP(clientRemoteIP netip.Addr, remote netip.AddrPort) func() { + ns.tcpDebugMu.Lock() + debugKey := ns.inFlightDialCtr + ns.inFlightDialCtr++ + ns.inFlightDials[debugKey] = tcpDialInfo{ + clientRemoteIP: clientRemoteIP, + dialAddr: remote, + start: time.Now(), + } + ns.tcpDebugMu.Unlock() + + return func() { + ns.tcpDebugMu.Lock() + delete(ns.inFlightDials, debugKey) + ns.tcpDebugMu.Unlock() + } +} + func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet.TCPConn, clientRemoteIP netip.Addr, wq *waiter.Queue, dialAddr netip.AddrPort) (handled bool) { dialAddrStr := dialAddr.String() if debugNetstack() { @@ -1008,9 +1039,13 @@ func (ns *Impl) forwardTCP(getClient func(...tcpip.SettableSocketOption) *gonet. cancel() }() + // Insert debug info, and remove it once we've dialed our outbound conn. + debugDialDone := ns.debugForwardedTCP(clientRemoteIP, dialAddr) + // Attempt to dial the outbound connection before we accept the inbound one. var stdDialer net.Dialer server, err := stdDialer.DialContext(ctx, "tcp", dialAddrStr) + debugDialDone() if err != nil { ns.logf("netstack: could not connect to local server at %s: %v", dialAddr.String(), err) return