wgengine/magicsock: add relayManager event logs (#17091)

These are gated behind magicsock component debug logging.

Updates tailscale/corp#30818

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited 2025-09-10 12:36:53 -07:00 committed by GitHub
parent 1ec3d20d10
commit 6feb6f3c75
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 69 additions and 11 deletions

View File

@ -2411,7 +2411,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src epAddr, shouldBeRelayHandshake
msgType, sender.ShortString(), derpNodeSrc.ShortString()) msgType, sender.ShortString(), derpNodeSrc.ShortString())
return return
} else { } else {
c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got %s for %v<->%v", c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got %s disco[0]=%v disco[1]=%v",
c.discoShort, epDisco.short, c.discoShort, epDisco.short,
ep.publicKey.ShortString(), derpStr(src.String()), ep.publicKey.ShortString(), derpStr(src.String()),
msgType, msgType,

View File

@ -6,6 +6,7 @@ package magicsock
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"net/netip" "net/netip"
"sync" "sync"
"time" "time"
@ -78,6 +79,9 @@ type serverDiscoVNI struct {
type relayHandshakeWork struct { type relayHandshakeWork struct {
wlb endpointWithLastBest wlb endpointWithLastBest
se udprelay.ServerEndpoint se udprelay.ServerEndpoint
server candidatePeerRelay
handshakeGen uint32
// handshakeServerEndpoint() always writes to doneCh (len 1) when it // handshakeServerEndpoint() always writes to doneCh (len 1) when it
// returns. It may end up writing the same event afterward to // returns. It may end up writing the same event afterward to
@ -91,6 +95,26 @@ type relayHandshakeWork struct {
cancel context.CancelFunc cancel context.CancelFunc
} }
func (r *relayHandshakeWork) dlogf(format string, args ...any) {
if !r.wlb.ep.c.debugLogging.Load() {
return
}
var relay string
if r.server.nodeKey.IsZero() {
relay = "from-call-me-maybe-via"
} else {
relay = r.server.nodeKey.ShortString()
}
r.wlb.ep.c.logf("%s node=%v relay=%v handshakeGen=%d disco[0]=%v disco[1]=%v",
fmt.Sprintf(format, args...),
r.wlb.ep.publicKey.ShortString(),
relay,
r.handshakeGen,
r.se.ClientDisco[0].ShortString(),
r.se.ClientDisco[1].ShortString(),
)
}
// newRelayServerEndpointEvent indicates a new [udprelay.ServerEndpoint] has // newRelayServerEndpointEvent indicates a new [udprelay.ServerEndpoint] has
// become known either via allocation with a relay server, or via // become known either via allocation with a relay server, or via
// [disco.CallMeMaybeVia] reception. This structure is immutable once // [disco.CallMeMaybeVia] reception. This structure is immutable once
@ -257,7 +281,9 @@ type relayDiscoMsgEvent struct {
type relayEndpointAllocWork struct { type relayEndpointAllocWork struct {
wlb endpointWithLastBest wlb endpointWithLastBest
discoKeys key.SortedPairOfDiscoPublic discoKeys key.SortedPairOfDiscoPublic
candidatePeerRelay candidatePeerRelay candidatePeerRelay candidatePeerRelay // zero value if learned via [disco.CallMeMaybeVia]
allocGen uint32
// allocateServerEndpoint() always writes to doneCh (len 1) when it // allocateServerEndpoint() always writes to doneCh (len 1) when it
// returns. It may end up writing the same event afterward to // returns. It may end up writing the same event afterward to
@ -271,6 +297,20 @@ type relayEndpointAllocWork struct {
cancel context.CancelFunc cancel context.CancelFunc
} }
func (r *relayEndpointAllocWork) dlogf(format string, args ...any) {
if !r.wlb.ep.c.debugLogging.Load() {
return
}
r.wlb.ep.c.logf("%s node=%v relay=%v allocGen=%d disco[0]=%v disco[1]=%v",
fmt.Sprintf(format, args...),
r.wlb.ep.publicKey.ShortString(),
r.candidatePeerRelay.nodeKey.ShortString(),
r.allocGen,
r.discoKeys.Get()[0].ShortString(),
r.discoKeys.Get()[1].ShortString(),
)
}
// init initializes [relayManager] if it is not already initialized. // init initializes [relayManager] if it is not already initialized.
func (r *relayManager) init() { func (r *relayManager) init() {
r.initOnce.Do(func() { r.initOnce.Do(func() {
@ -712,6 +752,7 @@ func (r *relayManager) handleNewServerEndpointRunLoop(newServerEndpoint newRelay
work := &relayHandshakeWork{ work := &relayHandshakeWork{
wlb: newServerEndpoint.wlb, wlb: newServerEndpoint.wlb,
se: newServerEndpoint.se, se: newServerEndpoint.se,
server: newServerEndpoint.server,
rxDiscoMsgCh: make(chan relayDiscoMsgEvent), rxDiscoMsgCh: make(chan relayDiscoMsgEvent),
doneCh: make(chan relayEndpointHandshakeWorkDoneEvent, 1), doneCh: make(chan relayEndpointHandshakeWorkDoneEvent, 1),
ctx: ctx, ctx: ctx,
@ -728,8 +769,9 @@ func (r *relayManager) handleNewServerEndpointRunLoop(newServerEndpoint newRelay
if r.handshakeGeneration == 0 { // generation must be nonzero if r.handshakeGeneration == 0 { // generation must be nonzero
r.handshakeGeneration++ r.handshakeGeneration++
} }
work.handshakeGen = r.handshakeGeneration
go r.handshakeServerEndpoint(work, r.handshakeGeneration) go r.handshakeServerEndpoint(work)
} }
// sendCallMeMaybeVia sends a [disco.CallMeMaybeVia] to ep over DERP. It must be // sendCallMeMaybeVia sends a [disco.CallMeMaybeVia] to ep over DERP. It must be
@ -758,7 +800,7 @@ func (r *relayManager) sendCallMeMaybeVia(ep *endpoint, se udprelay.ServerEndpoi
ep.c.sendDiscoMessage(epAddr{ap: derpAddr}, ep.publicKey, epDisco.key, callMeMaybeVia, discoVerboseLog) ep.c.sendDiscoMessage(epAddr{ap: derpAddr}, ep.publicKey, epDisco.key, callMeMaybeVia, discoVerboseLog)
} }
func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generation uint32) { func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork) {
done := relayEndpointHandshakeWorkDoneEvent{work: work} done := relayEndpointHandshakeWorkDoneEvent{work: work}
r.ensureDiscoInfoFor(work) r.ensureDiscoInfoFor(work)
@ -777,10 +819,13 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
common := disco.BindUDPRelayEndpointCommon{ common := disco.BindUDPRelayEndpointCommon{
VNI: work.se.VNI, VNI: work.se.VNI,
Generation: generation, Generation: work.handshakeGen,
RemoteKey: epDisco.key, RemoteKey: epDisco.key,
} }
work.dlogf("[v1] magicsock: relayManager: starting handshake addrPorts=%v",
work.se.AddrPorts,
)
sentBindAny := false sentBindAny := false
bind := &disco.BindUDPRelayEndpoint{ bind := &disco.BindUDPRelayEndpoint{
BindUDPRelayEndpointCommon: common, BindUDPRelayEndpointCommon: common,
@ -848,6 +893,7 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
for { for {
select { select {
case <-work.ctx.Done(): case <-work.ctx.Done():
work.dlogf("[v1] magicsock: relayManager: handshake canceled")
return return
case msgEvent := <-work.rxDiscoMsgCh: case msgEvent := <-work.rxDiscoMsgCh:
switch msg := msgEvent.msg.(type) { switch msg := msgEvent.msg.(type) {
@ -859,12 +905,14 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
if handshakeState >= disco.BindUDPRelayHandshakeStateAnswerSent { if handshakeState >= disco.BindUDPRelayHandshakeStateAnswerSent {
continue continue
} }
work.dlogf("[v1] magicsock: relayManager: got handshake challenge from %v", msgEvent.from)
txPing(msgEvent.from, &msg.Challenge) txPing(msgEvent.from, &msg.Challenge)
handshakeState = disco.BindUDPRelayHandshakeStateAnswerSent handshakeState = disco.BindUDPRelayHandshakeStateAnswerSent
case *disco.Ping: case *disco.Ping:
if handshakeState < disco.BindUDPRelayHandshakeStateAnswerSent { if handshakeState < disco.BindUDPRelayHandshakeStateAnswerSent {
continue continue
} }
work.dlogf("[v1] magicsock: relayManager: got relayed ping from %v", msgEvent.from)
// An inbound ping from the remote peer indicates we completed a // An inbound ping from the remote peer indicates we completed a
// handshake with the relay server (our answer msg was // handshake with the relay server (our answer msg was
// received). Chances are our ping was dropped before the remote // received). Chances are our ping was dropped before the remote
@ -885,6 +933,10 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
// round-trip latency and return. // round-trip latency and return.
done.pongReceivedFrom = msgEvent.from done.pongReceivedFrom = msgEvent.from
done.latency = time.Since(at) done.latency = time.Since(at)
work.dlogf("[v1] magicsock: relayManager: got relayed pong from %v latency=%v",
msgEvent.from,
done.latency.Round(time.Millisecond),
)
return return
default: default:
// unexpected message type, silently discard // unexpected message type, silently discard
@ -892,6 +944,7 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
} }
case <-timer.C: case <-timer.C:
// The handshake timed out. // The handshake timed out.
work.dlogf("[v1] magicsock: relayManager: handshake timed out")
return return
} }
} }
@ -899,7 +952,7 @@ func (r *relayManager) handshakeServerEndpoint(work *relayHandshakeWork, generat
const allocateUDPRelayEndpointRequestTimeout = time.Second * 10 const allocateUDPRelayEndpointRequestTimeout = time.Second * 10
func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork, generation uint32) { func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork) {
done := relayEndpointAllocWorkDoneEvent{work: work} done := relayEndpointAllocWorkDoneEvent{work: work}
defer func() { defer func() {
@ -910,7 +963,7 @@ func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork, gene
dm := &disco.AllocateUDPRelayEndpointRequest{ dm := &disco.AllocateUDPRelayEndpointRequest{
ClientDisco: work.discoKeys.Get(), ClientDisco: work.discoKeys.Get(),
Generation: generation, Generation: work.allocGen,
} }
sendAllocReq := func() { sendAllocReq := func() {
@ -923,6 +976,7 @@ func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork, gene
dm, dm,
discoVerboseLog, discoVerboseLog,
) )
work.dlogf("[v1] magicsock: relayManager: sent alloc request")
} }
go sendAllocReq() go sendAllocReq()
@ -938,16 +992,19 @@ func (r *relayManager) allocateServerEndpoint(work *relayEndpointAllocWork, gene
for { for {
select { select {
case <-work.ctx.Done(): case <-work.ctx.Done():
work.dlogf("[v1] magicsock: relayManager: alloc request canceled")
return return
case <-returnAfterTimer.C: case <-returnAfterTimer.C:
work.dlogf("[v1] magicsock: relayManager: alloc request timed out")
return return
case <-retryAfterTimer.C: case <-retryAfterTimer.C:
go sendAllocReq() go sendAllocReq()
case resp := <-work.rxDiscoMsgCh: case resp := <-work.rxDiscoMsgCh:
if resp.Generation != generation || if resp.Generation != work.allocGen ||
!work.discoKeys.Equal(key.NewSortedPairOfDiscoPublic(resp.ClientDisco[0], resp.ClientDisco[1])) { !work.discoKeys.Equal(key.NewSortedPairOfDiscoPublic(resp.ClientDisco[0], resp.ClientDisco[1])) {
continue continue
} }
work.dlogf("[v1] magicsock: relayManager: got alloc response")
done.allocated = udprelay.ServerEndpoint{ done.allocated = udprelay.ServerEndpoint{
ServerDisco: resp.ServerDisco, ServerDisco: resp.ServerDisco,
ClientDisco: resp.ClientDisco, ClientDisco: resp.ClientDisco,
@ -1004,6 +1061,7 @@ func (r *relayManager) allocateAllServersRunLoop(wlb endpointWithLastBest) {
} }
byCandidatePeerRelay[v] = started byCandidatePeerRelay[v] = started
r.allocGeneration++ r.allocGeneration++
go r.allocateServerEndpoint(started, r.allocGeneration) started.allocGen = r.allocGeneration
go r.allocateServerEndpoint(started)
} }
} }