diff --git a/client/local/local.go b/client/local/local.go index a606fbdf3..1be1f2ca7 100644 --- a/client/local/local.go +++ b/client/local/local.go @@ -33,6 +33,7 @@ import ( "tailscale.com/ipn" "tailscale.com/ipn/ipnstate" "tailscale.com/net/netutil" + "tailscale.com/net/udprelay/status" "tailscale.com/paths" "tailscale.com/safesocket" "tailscale.com/tailcfg" @@ -1184,6 +1185,16 @@ func (lc *Client) DebugSetExpireIn(ctx context.Context, d time.Duration) error { return err } +// DebugPeerRelaySessions returns debug information about the current peer +// relay sessions running through this node. +func (lc *Client) DebugPeerRelaySessions(ctx context.Context) (*status.ServerStatus, error) { + body, err := lc.send(ctx, "GET", "/localapi/v0/debug-peer-relay-sessions", 200, nil) + if err != nil { + return nil, fmt.Errorf("error %w: %s", err, body) + } + return decodeJSON[*status.ServerStatus](body) +} + // StreamDebugCapture streams a pcap-formatted packet capture. // // The provided context does not determine the lifetime of the diff --git a/cmd/derper/depaware.txt b/cmd/derper/depaware.txt index 61e42ede1..b0501b588 100644 --- a/cmd/derper/depaware.txt +++ b/cmd/derper/depaware.txt @@ -122,6 +122,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa tailscale.com/net/tlsdial/blockblame from tailscale.com/net/tlsdial tailscale.com/net/tsaddr from tailscale.com/ipn+ 💣 tailscale.com/net/tshttpproxy from tailscale.com/derp/derphttp+ + tailscale.com/net/udprelay/status from tailscale.com/client/local tailscale.com/net/wsconn from tailscale.com/cmd/derper tailscale.com/paths from tailscale.com/client/local 💣 tailscale.com/safesocket from tailscale.com/client/local diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index 442a96611..e0fdc27bb 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -883,6 +883,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ 💣 tailscale.com/net/tshttpproxy from tailscale.com/clientupdate/distsign+ tailscale.com/net/tstun from tailscale.com/tsd+ tailscale.com/net/udprelay/endpoint from tailscale.com/wgengine/magicsock + tailscale.com/net/udprelay/status from tailscale.com/client/local tailscale.com/omit from tailscale.com/ipn/conffile tailscale.com/paths from tailscale.com/client/local+ 💣 tailscale.com/portlist from tailscale.com/ipn/ipnlocal diff --git a/cmd/tailscale/cli/debug-peer-relay.go b/cmd/tailscale/cli/debug-peer-relay.go new file mode 100644 index 000000000..bef8b8369 --- /dev/null +++ b/cmd/tailscale/cli/debug-peer-relay.go @@ -0,0 +1,77 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !ios && !ts_omit_relayserver + +package cli + +import ( + "bytes" + "cmp" + "context" + "fmt" + "net/netip" + "slices" + + "github.com/peterbourgon/ff/v3/ffcli" + "tailscale.com/net/udprelay/status" +) + +func init() { + debugPeerRelayCmd = mkDebugPeerRelaySessionsCmd +} + +func mkDebugPeerRelaySessionsCmd() *ffcli.Command { + return &ffcli.Command{ + Name: "peer-relay-sessions", + ShortUsage: "tailscale debug peer-relay-sessions", + Exec: runPeerRelaySessions, + ShortHelp: "Print the current set of active peer relay sessions relayed through this node", + } +} + +func runPeerRelaySessions(ctx context.Context, args []string) error { + srv, err := localClient.DebugPeerRelaySessions(ctx) + if err != nil { + return err + } + + var buf bytes.Buffer + f := func(format string, a ...any) { fmt.Fprintf(&buf, format, a...) } + + f("Server port: ") + if srv.UDPPort == nil { + f("not configured (you can configure the port with 'tailscale set --relay-server-port=')") + } else { + f("%d", *srv.UDPPort) + } + f("\n") + f("Sessions count: %d\n", len(srv.Sessions)) + if len(srv.Sessions) == 0 { + Stdout.Write(buf.Bytes()) + return nil + } + + fmtSessionDirection := func(a, z status.ClientInfo) string { + fmtEndpoint := func(ap netip.AddrPort) string { + if ap.IsValid() { + return ap.String() + } + return "" + } + return fmt.Sprintf("%s(%s) --> %s(%s), Packets: %d Bytes: %d", + fmtEndpoint(a.Endpoint), a.ShortDisco, + fmtEndpoint(z.Endpoint), z.ShortDisco, + a.PacketsTx, a.BytesTx) + } + + f("\n") + slices.SortFunc(srv.Sessions, func(s1, s2 status.ServerSession) int { return cmp.Compare(s1.VNI, s2.VNI) }) + for _, s := range srv.Sessions { + f("VNI: %d\n", s.VNI) + f(" %s\n", fmtSessionDirection(s.Client1, s.Client2)) + f(" %s\n", fmtSessionDirection(s.Client2, s.Client1)) + } + Stdout.Write(buf.Bytes()) + return nil +} diff --git a/cmd/tailscale/cli/debug.go b/cmd/tailscale/cli/debug.go index b3170d000..c8a0d57c1 100644 --- a/cmd/tailscale/cli/debug.go +++ b/cmd/tailscale/cli/debug.go @@ -49,8 +49,9 @@ import ( ) var ( - debugCaptureCmd func() *ffcli.Command // or nil - debugPortmapCmd func() *ffcli.Command // or nil + debugCaptureCmd func() *ffcli.Command // or nil + debugPortmapCmd func() *ffcli.Command // or nil + debugPeerRelayCmd func() *ffcli.Command // or nil ) func debugCmd() *ffcli.Command { @@ -374,6 +375,7 @@ func debugCmd() *ffcli.Command { return fs })(), }, + ccall(debugPeerRelayCmd), }...), } } diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index b9b7db525..deeb9c3a3 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -143,6 +143,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/net/tlsdial/blockblame from tailscale.com/net/tlsdial tailscale.com/net/tsaddr from tailscale.com/client/web+ 💣 tailscale.com/net/tshttpproxy from tailscale.com/clientupdate/distsign+ + tailscale.com/net/udprelay/status from tailscale.com/client/local+ tailscale.com/paths from tailscale.com/client/local+ 💣 tailscale.com/safesocket from tailscale.com/client/local+ tailscale.com/syncs from tailscale.com/control/controlhttp+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 22f80d5d7..f85063ddb 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -358,6 +358,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/net/tstun from tailscale.com/cmd/tailscaled+ tailscale.com/net/udprelay from tailscale.com/feature/relayserver tailscale.com/net/udprelay/endpoint from tailscale.com/feature/relayserver+ + tailscale.com/net/udprelay/status from tailscale.com/client/local+ tailscale.com/omit from tailscale.com/ipn/conffile tailscale.com/paths from tailscale.com/client/local+ 💣 tailscale.com/portlist from tailscale.com/ipn/ipnlocal diff --git a/cmd/tsidp/depaware.txt b/cmd/tsidp/depaware.txt index d92a0b41a..f6bab6978 100644 --- a/cmd/tsidp/depaware.txt +++ b/cmd/tsidp/depaware.txt @@ -314,6 +314,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar 💣 tailscale.com/net/tshttpproxy from tailscale.com/clientupdate/distsign+ tailscale.com/net/tstun from tailscale.com/tsd+ tailscale.com/net/udprelay/endpoint from tailscale.com/wgengine/magicsock + tailscale.com/net/udprelay/status from tailscale.com/client/local tailscale.com/omit from tailscale.com/ipn/conffile tailscale.com/paths from tailscale.com/client/local+ 💣 tailscale.com/portlist from tailscale.com/ipn/ipnlocal diff --git a/feature/relayserver/relayserver.go b/feature/relayserver/relayserver.go index d77d7145a..91d07484c 100644 --- a/feature/relayserver/relayserver.go +++ b/feature/relayserver/relayserver.go @@ -6,7 +6,10 @@ package relayserver import ( + "encoding/json" + "fmt" "log" + "net/http" "net/netip" "strings" "sync" @@ -16,8 +19,10 @@ import ( "tailscale.com/feature" "tailscale.com/ipn" "tailscale.com/ipn/ipnext" + "tailscale.com/ipn/localapi" "tailscale.com/net/udprelay" "tailscale.com/net/udprelay/endpoint" + "tailscale.com/net/udprelay/status" "tailscale.com/tailcfg" "tailscale.com/types/key" "tailscale.com/types/logger" @@ -33,6 +38,32 @@ const featureName = "relayserver" func init() { feature.Register(featureName) ipnext.RegisterExtension(featureName, newExtension) + localapi.Register("debug-peer-relay-sessions", servePeerRelayDebugSessions) +} + +// servePeerRelayDebugSessions is an HTTP handler for the Local API that +// returns debug/status information for peer relay sessions being relayed by +// this Tailscale node. It writes a JSON-encoded [status.ServerStatus] into the +// HTTP response, or returns an HTTP 405/500 with error text as the body. +func servePeerRelayDebugSessions(h *localapi.Handler, w http.ResponseWriter, r *http.Request) { + if r.Method != "GET" { + http.Error(w, "GET required", http.StatusMethodNotAllowed) + return + } + + var e *extension + if ok := h.LocalBackend().FindMatchingExtension(&e); !ok { + http.Error(w, "peer relay server extension unavailable", http.StatusInternalServerError) + return + } + + st := e.serverStatus() + j, err := json.Marshal(st) + if err != nil { + http.Error(w, fmt.Sprintf("failed to marshal json: %v", err), http.StatusInternalServerError) + return + } + w.Write(j) } // newExtension is an [ipnext.NewExtensionFn] that creates a new relay server @@ -53,16 +84,18 @@ type extension struct { mu sync.Mutex // guards the following fields shutdown bool - port *int // ipn.Prefs.RelayServerPort, nil if disabled - disconnectFromBusCh chan struct{} // non-nil if consumeEventbusTopics is running, closed to signal it to return - busDoneCh chan struct{} // non-nil if consumeEventbusTopics is running, closed when it returns - hasNodeAttrDisableRelayServer bool // tailcfg.NodeAttrDisableRelayServer + port *int // ipn.Prefs.RelayServerPort, nil if disabled + disconnectFromBusCh chan struct{} // non-nil if consumeEventbusTopics is running, closed to signal it to return + busDoneCh chan struct{} // non-nil if consumeEventbusTopics is running, closed when it returns + debugSessionsCh chan chan []status.ServerSession // non-nil if consumeEventbusTopics is running + hasNodeAttrDisableRelayServer bool // tailcfg.NodeAttrDisableRelayServer } // relayServer is the interface of [udprelay.Server]. type relayServer interface { AllocateEndpoint(discoA key.DiscoPublic, discoB key.DiscoPublic) (endpoint.ServerEndpoint, error) Close() error + GetSessions() []status.ServerSession } // Name implements [ipnext.Extension]. @@ -93,6 +126,7 @@ func (e *extension) handleBusLifetimeLocked() { port := *e.port e.disconnectFromBusCh = make(chan struct{}) e.busDoneCh = make(chan struct{}) + e.debugSessionsCh = make(chan chan []status.ServerSession) go e.consumeEventbusTopics(port) } @@ -139,6 +173,11 @@ var overrideAddrs = sync.OnceValue(func() (ret []netip.Addr) { return }) +// consumeEventbusTopics serves endpoint allocation requests over the eventbus. +// It also serves [relayServer] debug information on a channel. +// consumeEventbusTopics must never acquire [extension.mu], which can be held by +// other goroutines while waiting to receive on [extension.busDoneCh] or the +// inner [extension.debugSessionsCh] channel. func (e *extension) consumeEventbusTopics(port int) { defer close(e.busDoneCh) @@ -159,6 +198,14 @@ func (e *extension) consumeEventbusTopics(port int) { return case <-eventClient.Done(): return + case respCh := <-e.debugSessionsCh: + if rs == nil { + // Don't initialize the server simply for a debug request. + respCh <- nil + continue + } + sessions := rs.GetSessions() + respCh <- sessions case req := <-reqSub.Events(): if rs == nil { var err error @@ -199,6 +246,7 @@ func (e *extension) disconnectFromBusLocked() { <-e.busDoneCh e.busDoneCh = nil e.disconnectFromBusCh = nil + e.debugSessionsCh = nil } } @@ -210,3 +258,30 @@ func (e *extension) Shutdown() error { e.shutdown = true return nil } + +// serverStatus gathers and returns current peer relay server status information +// for this Tailscale node, and status of each peer relay session this node is +// relaying (if any). +func (e *extension) serverStatus() status.ServerStatus { + e.mu.Lock() + defer e.mu.Unlock() + + st := status.ServerStatus{ + UDPPort: nil, + Sessions: nil, + } + if e.port == nil || e.busDoneCh == nil { + return st + } + st.UDPPort = ptr.To(*e.port) + + ch := make(chan []status.ServerSession) + select { + case e.debugSessionsCh <- ch: + resp := <-ch + st.Sessions = resp + return st + case <-e.busDoneCh: + return st + } +} diff --git a/net/udprelay/server.go b/net/udprelay/server.go index 123813c16..424c7a617 100644 --- a/net/udprelay/server.go +++ b/net/udprelay/server.go @@ -31,6 +31,7 @@ import ( "tailscale.com/net/sockopts" "tailscale.com/net/stun" "tailscale.com/net/udprelay/endpoint" + "tailscale.com/net/udprelay/status" "tailscale.com/tstime" "tailscale.com/types/key" "tailscale.com/types/logger" @@ -95,6 +96,8 @@ type serverEndpoint struct { boundAddrPorts [2]netip.AddrPort // or zero value if a handshake has never completed for that relay leg lastSeen [2]time.Time // TODO(jwhited): consider using mono.Time challenge [2][disco.BindUDPRelayChallengeLen]byte + packetsRx [2]uint64 // num packets received from/sent by each client after they are bound + bytesRx [2]uint64 // num bytes received from/sent by each client after they are bound lamportID uint64 vni uint32 @@ -223,9 +226,13 @@ func (e *serverEndpoint) handlePacket(from netip.AddrPort, gh packet.GeneveHeade switch { case from == e.boundAddrPorts[0]: e.lastSeen[0] = time.Now() + e.packetsRx[0]++ + e.bytesRx[0] += uint64(len(b)) return b, e.boundAddrPorts[1] case from == e.boundAddrPorts[1]: e.lastSeen[1] = time.Now() + e.packetsRx[1]++ + e.bytesRx[1] += uint64(len(b)) return b, e.boundAddrPorts[0] default: // unrecognized source @@ -782,3 +789,41 @@ func (s *Server) AllocateEndpoint(discoA, discoB key.DiscoPublic) (endpoint.Serv SteadyStateLifetime: tstime.GoDuration{Duration: s.steadyStateLifetime}, }, nil } + +// extractClientInfo constructs a [status.ClientInfo] for one of the two peer +// relay clients involved in this session. +func extractClientInfo(idx int, ep *serverEndpoint) status.ClientInfo { + if idx != 0 && idx != 1 { + panic(fmt.Sprintf("idx passed to extractClientInfo() must be 0 or 1; got %d", idx)) + } + + return status.ClientInfo{ + Endpoint: ep.boundAddrPorts[idx], + ShortDisco: ep.discoPubKeys.Get()[idx].ShortString(), + PacketsTx: ep.packetsRx[idx], + BytesTx: ep.bytesRx[idx], + } +} + +// GetSessions returns a slice of peer relay session statuses, with each +// entry containing detailed info about the server and clients involved in +// each session. This information is intended for debugging/status UX, and +// should not be relied on for any purpose outside of that. +func (s *Server) GetSessions() []status.ServerSession { + s.mu.Lock() + defer s.mu.Unlock() + if s.closed { + return nil + } + var sessions = make([]status.ServerSession, 0, len(s.byDisco)) + for _, se := range s.byDisco { + c1 := extractClientInfo(0, se) + c2 := extractClientInfo(1, se) + sessions = append(sessions, status.ServerSession{ + VNI: se.vni, + Client1: c1, + Client2: c2, + }) + } + return sessions +} diff --git a/net/udprelay/status/status.go b/net/udprelay/status/status.go new file mode 100644 index 000000000..3866efada --- /dev/null +++ b/net/udprelay/status/status.go @@ -0,0 +1,75 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +// Package status contains types relating to the status of peer relay sessions +// between peer relay client nodes via a peer relay server. +package status + +import ( + "net/netip" +) + +// ServerStatus contains the listening UDP port and active sessions (if any) for +// this node's peer relay server at a point in time. +type ServerStatus struct { + // UDPPort is the UDP port number that the peer relay server forwards over, + // as configured by the user with 'tailscale set --relay-server-port='. + // If the port has not been configured, UDPPort will be nil. + UDPPort *int + // Sessions is a slice of detailed status information about each peer + // relay session that this node's peer relay server is involved with. It + // may be empty. + Sessions []ServerSession +} + +// ClientInfo contains status-related information about a single peer relay +// client involved in a single peer relay session. +type ClientInfo struct { + // Endpoint is the [netip.AddrPort] of this peer relay client's underlay + // endpoint participating in the session, or a zero value if the client + // has not completed a handshake. + Endpoint netip.AddrPort + // ShortDisco is a string representation of this peer relay client's disco + // public key. + // + // TODO: disco keys are pretty meaningless to end users, and they are also + // ephemeral. We really need node keys (or translation to first ts addr), + // but those are not fully plumbed into the [udprelay.Server]. Disco keys + // can also be ambiguous to a node key, but we could add node key into a + // [disco.AllocateUDPRelayEndpointRequest] in similar fashion to + // [disco.Ping]. There's also the problem of netmap trimming, where we + // can't verify a node key maps to a disco key. + ShortDisco string + // PacketsTx is the number of packets this peer relay client has sent to + // the other client via the relay server after completing a handshake. This + // is identical to the number of packets that the peer relay server has + // received from this client. + PacketsTx uint64 + // BytesTx is the total overlay bytes this peer relay client has sent to + // the other client via the relay server after completing a handshake. This + // is identical to the total overlay bytes that the peer relay server has + // received from this client. + BytesTx uint64 +} + +// ServerSession contains status information for a single session between two +// peer relay clients, which are relayed via one peer relay server. This is the +// status as seen by the peer relay server; each client node may have a +// different view of the session's current status based on connectivity and +// where the client is in the peer relay endpoint setup (allocation, binding, +// pinging, active). +type ServerSession struct { + // VNI is the Virtual Network Identifier for this peer relay session, which + // comes from the Geneve header and is unique to this session. + VNI uint32 + // Client1 contains status information about one of the two peer relay + // clients involved in this session. Note that 'Client1' does NOT mean this + // was/wasn't the allocating client, or the first client to bind, etc; this + // is just one client of two. + Client1 ClientInfo + // Client2 contains status information about one of the two peer relay + // clients involved in this session. Note that 'Client2' does NOT mean this + // was/wasn't the allocating client, or the second client to bind, etc; this + // is just one client of two. + Client2 ClientInfo +} diff --git a/tsnet/depaware.txt b/tsnet/depaware.txt index de9e69f9c..619183a60 100644 --- a/tsnet/depaware.txt +++ b/tsnet/depaware.txt @@ -310,6 +310,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware) 💣 tailscale.com/net/tshttpproxy from tailscale.com/clientupdate/distsign+ tailscale.com/net/tstun from tailscale.com/tsd+ tailscale.com/net/udprelay/endpoint from tailscale.com/wgengine/magicsock + tailscale.com/net/udprelay/status from tailscale.com/client/local tailscale.com/omit from tailscale.com/ipn/conffile tailscale.com/paths from tailscale.com/client/local+ 💣 tailscale.com/portlist from tailscale.com/ipn/ipnlocal diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 6e0dc87eb..136004bc8 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -38,6 +38,7 @@ import ( "tailscale.com/ipn" "tailscale.com/net/tsaddr" "tailscale.com/net/tstun" + "tailscale.com/net/udprelay/status" "tailscale.com/tailcfg" "tailscale.com/tstest" "tailscale.com/tstest/integration/testcontrol" @@ -1526,7 +1527,8 @@ func TestEncryptStateMigration(t *testing.T) { // TestPeerRelayPing creates three nodes with one acting as a peer relay. // The test succeeds when "tailscale ping" flows through the peer -// relay between all 3 nodes. +// relay between all 3 nodes, and "tailscale debug peer-relay-sessions" returns +// expected values. func TestPeerRelayPing(t *testing.T) { tstest.Shard(t) tstest.Parallel(t) @@ -1624,6 +1626,47 @@ func TestPeerRelayPing(t *testing.T) { t.Fatal(err) } } + + allControlNodes := env.Control.AllNodes() + wantSessionsForDiscoShorts := make(set.Set[[2]string]) + for i, a := range allControlNodes { + if i == len(allControlNodes)-1 { + break + } + for _, z := range allControlNodes[i+1:] { + wantSessionsForDiscoShorts.Add([2]string{a.DiscoKey.ShortString(), z.DiscoKey.ShortString()}) + } + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + debugSessions, err := peerRelay.LocalClient().DebugPeerRelaySessions(ctx) + cancel() + if err != nil { + t.Fatalf("debug peer-relay-sessions failed: %v", err) + } + if len(debugSessions.Sessions) != len(wantSessionsForDiscoShorts) { + t.Errorf("got %d peer relay sessions, want %d", len(debugSessions.Sessions), len(wantSessionsForDiscoShorts)) + } + for _, session := range debugSessions.Sessions { + if !wantSessionsForDiscoShorts.Contains([2]string{session.Client1.ShortDisco, session.Client2.ShortDisco}) && + !wantSessionsForDiscoShorts.Contains([2]string{session.Client2.ShortDisco, session.Client1.ShortDisco}) { + t.Errorf("peer relay session for disco keys %s<->%s not found in debug peer-relay-sessions: %+v", session.Client1.ShortDisco, session.Client2.ShortDisco, debugSessions.Sessions) + } + for _, client := range []status.ClientInfo{session.Client1, session.Client2} { + if client.BytesTx == 0 { + t.Errorf("unexpected 0 bytes TX counter in peer relay session: %+v", session) + } + if client.PacketsTx == 0 { + t.Errorf("unexpected 0 packets TX counter in peer relay session: %+v", session) + } + if !client.Endpoint.IsValid() { + t.Errorf("unexpected endpoint zero value in peer relay session: %+v", session) + } + if len(client.ShortDisco) == 0 { + t.Errorf("unexpected zero len short disco in peer relay session: %+v", session) + } + } + } } func TestC2NDebugNetmap(t *testing.T) {