Sending a ping of a larger MTU works and updates the endpoint state

with the highest mtu.

Signed-off-by: Val <valerie@tailscale.com>
This commit is contained in:
Val 2023-07-10 22:54:05 +02:00 committed by salman
parent 69bdf2f2c5
commit 0cdb85dff0
13 changed files with 208 additions and 60 deletions

View File

@ -809,10 +809,12 @@ func (lc *LocalClient) ExpandSNIName(ctx context.Context, name string) (fqdn str
// Ping sends a ping of the provided type to the provided IP and waits
// for its response.
func (lc *LocalClient) Ping(ctx context.Context, ip netip.Addr, pingtype tailcfg.PingType) (*ipnstate.PingResult, error) {
func (lc *LocalClient) Ping(ctx context.Context, ip netip.Addr, pingtype tailcfg.PingType, mtu int) (*ipnstate.PingResult, error) {
v := url.Values{}
v.Set("ip", ip.String())
v.Set("mtu", strconv.Itoa(mtu))
v.Set("type", string(pingtype))
// XXX new api or whatnot
body, err := lc.send(ctx, "POST", "/localapi/v0/ping?"+v.Encode(), 200, nil)
if err != nil {
return nil, fmt.Errorf("error %w: %s", err, body)

View File

@ -53,12 +53,14 @@ relay node.
fs.BoolVar(&pingArgs.peerAPI, "peerapi", false, "try hitting the peer's peerapi HTTP server")
fs.IntVar(&pingArgs.num, "c", 10, "max number of pings to send. 0 for infinity.")
fs.DurationVar(&pingArgs.timeout, "timeout", 5*time.Second, "timeout before giving up on a ping")
fs.IntVar(&pingArgs.mtu, "mtu", 0, "send a packet with this many bytes total")
return fs
})(),
}
var pingArgs struct {
num int
mtu int
untilDirect bool
verbose bool
tsmp bool
@ -115,7 +117,7 @@ func runPing(ctx context.Context, args []string) error {
for {
n++
ctx, cancel := context.WithTimeout(ctx, pingArgs.timeout)
pr, err := localClient.Ping(ctx, netip.MustParseAddr(ip), pingType())
pr, err := localClient.Ping(ctx, netip.MustParseAddr(ip), pingType(), pingArgs.mtu)
cancel()
if err != nil {
if errors.Is(err, context.DeadlineExceeded) {

View File

@ -170,7 +170,7 @@ type ControlDialPlanner interface {
// Pinger is the LocalBackend.Ping method.
type Pinger interface {
// Ping is a request to do a ping with the peer handling the given IP.
Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType) (*ipnstate.PingResult, error)
Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, mtu int) (*ipnstate.PingResult, error)
}
type Decompressor interface {
@ -1670,7 +1670,7 @@ func doPingerPing(logf logger.Logf, c *http.Client, pr *tailcfg.PingRequest, pin
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
res, err := pinger.Ping(ctx, pr.IP, pingType)
res, err := pinger.Ping(ctx, pr.IP, pingType, 0)
if err != nil {
d := time.Since(start).Round(time.Millisecond)
logf("doPingerPing: ping error of type %q to %v after %v: %v", pingType, pr.IP, d, err)

View File

@ -2395,7 +2395,7 @@ func (b *LocalBackend) StartLoginInteractive() {
}
}
func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType) (*ipnstate.PingResult, error) {
func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, mtu int) (*ipnstate.PingResult, error) {
if pingType == tailcfg.PingPeerAPI {
t0 := time.Now()
node, base, err := b.pingPeerAPI(ctx, ip)
@ -2423,7 +2423,7 @@ func (b *LocalBackend) Ping(ctx context.Context, ip netip.Addr, pingType tailcfg
case ch <- pr:
default:
}
})
}, mtu)
select {
case pr := <-ch:
return pr, nil

View File

@ -1335,7 +1335,17 @@ func (h *Handler) servePing(w http.ResponseWriter, r *http.Request) {
http.Error(w, "missing 'type' parameter", 400)
return
}
res, err := h.b.Ping(ctx, ip, tailcfg.PingType(pingTypeStr))
mtuStr := r.FormValue("mtu")
if mtuStr == "" {
// XXX old api didn't include this arg
mtuStr = "0"
}
mtu, err := strconv.Atoi(mtuStr)
if err != nil {
http.Error(w, "invalid 'mtu' parameter", 400)
return
}
res, err := h.b.Ping(ctx, ip, tailcfg.PingType(pingTypeStr), mtu)
if err != nil {
writeErrorJSON(w, err)
return

View File

@ -289,6 +289,9 @@ type State struct {
// PAC is the URL to the Proxy Autoconfig URL, if applicable.
PAC string
// MaxMTU is the largest MTU of the available usable interfaces
MaxMTU int
}
func (s *State) String() string {
@ -524,6 +527,9 @@ func GetState() (*State, error) {
}
s.HaveV6 = s.HaveV6 || isUsableV6(pfx.Addr())
s.HaveV4 = s.HaveV4 || isUsableV4(pfx.Addr())
if ni.MTU > s.MaxMTU {
s.MaxMTU = ni.MTU
}
}
}); err != nil {
return nil, err

View File

@ -239,7 +239,7 @@ func TestConn(t *testing.T) {
}
// ping to make sure the connection is up.
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP)
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP, 0)
if err != nil {
t.Fatal(err)
}

View File

@ -1007,7 +1007,7 @@ func (c *Conn) LastRecvActivityOfNodeKey(nk key.NodePublic) string {
}
// Ping handles a "tailscale ping" CLI query.
func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnstate.PingResult), mtu int) {
c.mu.Lock()
defer c.mu.Unlock()
if c.privateKey.IsZero() {
@ -1031,7 +1031,7 @@ func (c *Conn) Ping(peer *tailcfg.Node, res *ipnstate.PingResult, cb func(*ipnst
cb(res)
return
}
ep.cliPing(res, cb)
ep.cliPing(res, cb, mtu)
}
// c.mu must be held
@ -2056,13 +2056,45 @@ const (
// speeds.
var debugIPv4DiscoPingPenalty = envknob.RegisterDuration("TS_DISCO_PONG_IPV4_DELAY")
// Peer MTU notes:
//
// The general concept is to send ~5 different sizes of ping/pong
// packets between clients. The largest one that comes back defines
// our MTU for communicating with this peer via this endpoint. This is
// accomplished by teaching betterAddr() about pong packet length and
// having it prefer the larger mtu for the same addr/port
// endpoint. This prototype adds a wantMTU parameter to the relevant
// disco routines because it was easy.
//
// TODO: Actually use the MTU when sending packets to our peer via
// this endpoint.
//
// TODO: Do the math to convert MTU <-> the wantMTU parameter, if
// necessary. I think it is necessary?
// usefulMtus are the set of likely on-the-wire MTUs (including all the
// layers of protocal headers above link layer)
//
// Each ping is kicked off with a separate go routine and they seem to
// be run in approximately LIFO order, so if we want the biggest ping
// to arrive first put it last?
var usefulMtus = [...]int{
// 576, // Smallest MTU for IPv4, probably useless?
// 1124, // An observed max mtu in the wild, maybe 1100 instead?
1280, // Smallest MTU for IPv6, current default
1400, // A little less, for tunnels or such
1500, // Most common real world MTU
// 8000, // Some jumbo frames are this size
9000, // Most jumbo frames are this size or slightly larger
}
// sendDiscoMessage sends discovery message m to dstDisco at dst.
//
// If dst is a DERP IP:port, then dstKey must be non-zero.
//
// The dstKey should only be non-zero if the dstDisco key
// unambiguously maps to exactly one peer.
func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDisco key.DiscoPublic, m disco.Message, logLevel discoLogLevel) (sent bool, err error) {
func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDisco key.DiscoPublic, m disco.Message, logLevel discoLogLevel, mtu int) (sent bool, err error) {
isDERP := dst.Addr() == derpMagicIPAddr
if _, isPong := m.(*disco.Pong); isPong && !isDERP && dst.Addr().Is4() {
time.Sleep(debugIPv4DiscoPingPenalty())
@ -2077,7 +2109,12 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDi
if _, err := crand.Read(nonce[:]); err != nil {
panic(err) // worth dying for
}
pkt := make([]byte, 0, 512) // TODO: size it correctly? pool? if it matters.
bufSize := 512
if mtu != 0 {
bufSize = mtu
}
pkt := make([]byte, 0, bufSize)
pkt = append(pkt, disco.Magic...)
pkt = c.discoPublic.AppendTo(pkt)
di := c.discoInfoLocked(dstDisco)
@ -2089,8 +2126,24 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDi
metricSendDiscoUDP.Add(1)
}
box := di.sharedKey.Seal(m.AppendMarshal(nil))
// pm needs to be padded out to hit our mtu goal
//
// IP adds 20/40 bytes to the packet
//
// Disco header adds ??? bytes
//
// Seal adds 40 bytes to the input
pm := m.AppendMarshal(nil)
// Now extend it to the mtu minus all the headers
var pad int
if mtu != 0 {
pad = mtu - 40 - 40 - len(pkt) - len(pm)
pm = append(pm, make([]byte, pad, pad)...)
}
box := di.sharedKey.Seal(pm)
//c.logf("pkt %v mtu %v pad %v pm %v box %v", len(pkt), mtu, pad, len(pm), len(box))
pkt = append(pkt, box...)
//c.logf("pkt %v", len(pkt))
sent, err = c.sendAddr(dst, dstKey, pkt)
if sent {
if logLevel == discoLog || (logLevel == discoVerboseLog && debugDisco()) {
@ -2098,7 +2151,7 @@ func (c *Conn) sendDiscoMessage(dst netip.AddrPort, dstKey key.NodePublic, dstDi
if !dstKey.IsZero() {
node = dstKey.ShortString()
}
c.dlogf("[v1] magicsock: disco: %v->%v (%v, %v) sent %v", c.discoShort, dstDisco.ShortString(), node, derpStr(dst.String()), disco.MessageSummary(m))
c.logf("magicsock: disco: %v->%v (%v, %v) sent %v len %v\n", c.discoShort, dstDisco.ShortString(), node, derpStr(dst.String()), disco.MessageSummary(m), len(pkt))
}
if isDERP {
metricSentDiscoDERP.Add(1)
@ -2168,6 +2221,7 @@ const (
// - senderDiscoPubKey [32]byte
// - nonce [24]byte
// - naclbox of payload (see tailscale.com/disco package for inner payload format)
// - optionally, zero padding to test out the MTU
//
// For messages received over DERP, the src.Addr() will be derpMagicIP (with
// src.Port() being the region ID) and the derpNodeSrc will be the node key
@ -2175,10 +2229,10 @@ const (
// over UDP.
func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc key.NodePublic, via discoRXPath) (isDiscoMsg bool) {
const headerLen = len(disco.Magic) + key.DiscoPublicRawLen
if len(msg) < headerLen || string(msg[:len(disco.Magic)]) != disco.Magic {
msgLen := len(msg)
if msgLen < headerLen || string(msg[:len(disco.Magic)]) != disco.Magic {
return false
}
// If the first four parts are the prefix of disco.Magic
// (0x5453f09f) then it's definitely not a valid WireGuard
// packet (which starts with little-endian uint32 1, 2, 3, 4).
@ -2194,7 +2248,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
return
}
if debugDisco() {
c.logf("magicsock: disco: got disco-looking frame from %v via %s", sender.ShortString(), via)
c.logf("magicsock: disco: got disco-looking frame from %v via %s len %v", sender.ShortString(), via, msgLen)
}
if c.privateKey.IsZero() {
// Ignore disco messages when we're stopped.
@ -2267,14 +2321,14 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke
switch dm := dm.(type) {
case *disco.Ping:
metricRecvDiscoPing.Add(1)
c.handlePingLocked(dm, src, di, derpNodeSrc)
c.handlePingLocked(dm, src, di, derpNodeSrc, msgLen)
case *disco.Pong:
metricRecvDiscoPong.Add(1)
// There might be multiple nodes for the sender's DiscoKey.
// Ask each to handle it, stopping once one reports that
// the Pong's TxID was theirs.
c.peerMap.forEachEndpointWithDiscoKey(sender, func(ep *endpoint) (keepGoing bool) {
if ep.handlePongConnLocked(dm, di, src) {
if ep.handlePongConnLocked(dm, di, src, msgLen) {
return false
}
return true
@ -2351,7 +2405,8 @@ func (c *Conn) unambiguousNodeKeyOfPingLocked(dm *disco.Ping, dk key.DiscoPublic
// di is the discoInfo of the source of the ping.
// derpNodeSrc is non-zero if the ping arrived via DERP.
func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInfo, derpNodeSrc key.NodePublic) {
// msgLen is the length of the disco message
func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInfo, derpNodeSrc key.NodePublic, msgLen int) {
likelyHeartBeat := src == di.lastPingFrom && time.Since(di.lastPingTime) < 5*time.Second
di.lastPingFrom = src
di.lastPingTime = time.Now()
@ -2380,16 +2435,25 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInf
// Remember this route if not present.
var numNodes int
var dup bool
// adjust mtu for ip header
// XXX assuming ipv6
var mtu int
// If the msgLen is the size of the bare pong packet, ignore it
// XXX make this much less terrible
if msgLen > 124 {
c.logf("adding 40 bytes to msgLen %v", msgLen)
mtu = msgLen + 40
}
if isDerp {
if ep, ok := c.peerMap.endpointForNodeKey(derpNodeSrc); ok {
if ep.addCandidateEndpoint(src, dm.TxID) {
if ep.addCandidateEndpoint(src, dm.TxID, 0) {
return
}
numNodes = 1
}
} else {
c.peerMap.forEachEndpointWithDiscoKey(di.discoKey, func(ep *endpoint) (keepGoing bool) {
if ep.addCandidateEndpoint(src, dm.TxID) {
if ep.addCandidateEndpoint(src, dm.TxID, mtu) {
dup = true
return false
}
@ -2427,7 +2491,7 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInf
go c.sendDiscoMessage(ipDst, dstKey, discoDest, &disco.Pong{
TxID: dm.TxID,
Src: src,
}, discoVerboseLog)
}, discoVerboseLog, mtu)
}
// enqueueCallMeMaybe schedules a send of disco.CallMeMaybe to de via derpAddr
@ -2469,12 +2533,12 @@ func (c *Conn) enqueueCallMeMaybe(derpAddr netip.AddrPort, de *endpoint) {
for _, ep := range c.lastEndpoints {
eps = append(eps, ep.Addr)
}
go de.c.sendDiscoMessage(derpAddr, de.publicKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
go de.c.sendDiscoMessage(derpAddr, de.publicKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog, 0)
if debugSendCallMeUnknownPeer() {
// Send a callMeMaybe packet to a non-existent peer
unknownKey := key.NewNode().Public()
c.logf("magicsock: sending CallMeMaybe to unknown peer per TS_DEBUG_SEND_CALLME_UNKNOWN_PEER")
go de.c.sendDiscoMessage(derpAddr, unknownKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog)
go de.c.sendDiscoMessage(derpAddr, unknownKey, epDisco.key, &disco.CallMeMaybe{MyNumber: eps}, discoLog, 0)
}
}
@ -4141,7 +4205,7 @@ type endpoint struct {
lastFullPing mono.Time // last time we pinged all disco endpoints
derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
bestAddr addrLatency // best non-DERP path; zero if none
bestAddr addrQuality // best non-DERP path; zero if none
bestAddrAt mono.Time // time best address re-confirmed
trustBestAddrUntil mono.Time // time when bestAddr expires
sentPing map[stun.TxID]sentPing
@ -4249,6 +4313,7 @@ type endpointState struct {
recentPong uint16 // index into recentPongs of most recent; older before, wrapped
index int16 // index in nodecfg.Node.Endpoints; meaningless if lastGotPing non-zero
mtu int // mtu of connection to this endpoint
}
// indexSentinelDeleted is the temporary value that endpointState.index takes while
@ -4291,11 +4356,13 @@ func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) {
What: "deleteEndpointLocked-bestAddr-" + why,
From: de.bestAddr,
})
de.bestAddr = addrLatency{}
de.bestAddr = addrQuality{}
}
}
// pongHistoryCount is how many pongReply values we keep per endpointState
// XXX should maybe make this bigger for mtu stuff
// const pongHistoryCount = 64 * len(usefulMtus)
const pongHistoryCount = 64
type pongReply struct {
@ -4303,6 +4370,7 @@ type pongReply struct {
pongAt mono.Time // when we received the pong
from netip.AddrPort // the pong's src (usually same as endpoint map key)
pongSrc netip.AddrPort // what they reported they heard
mtu int // total packet size including IP header
}
type sentPing struct {
@ -4310,6 +4378,7 @@ type sentPing struct {
at mono.Time
timer *time.Timer // timeout timer
purpose discoPingPurpose
mtu int // total packet size including IP header
}
// initFakeUDPAddr populates fakeWGAddr with a globally unique fake UDPAddr.
@ -4510,7 +4579,7 @@ func (de *endpoint) noteActiveLocked() {
// cliPing starts a ping for the "tailscale ping" command. res is value to call cb with,
// already partially filled.
func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResult)) {
func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResult), mtu int) {
de.mu.Lock()
defer de.mu.Unlock()
@ -4532,10 +4601,16 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResu
// Otherwise "tailscale ping" results to a node on the local network
// can look like they're bouncing between, say 10.0.0.0/9 and the peer's
// IPv6 address, both 1ms away, and it's random who replies first.
de.startDiscoPingLocked(udpAddr, now, pingCLI)
de.startDiscoPingLockedMTU(udpAddr, now, pingCLI, mtu)
} else {
for ep := range de.endpointState {
de.startDiscoPingLocked(ep, now, pingCLI)
if mtu == 0 {
for _, testMtu := range usefulMtus {
de.startDiscoPingLockedMTU(ep, now, pingDiscovery, testMtu)
}
} else {
de.startDiscoPingLockedMTU(ep, now, pingCLI, mtu)
}
}
}
de.noteActiveLocked()
@ -4647,11 +4722,11 @@ func (de *endpoint) removeSentDiscoPingLocked(txid stun.TxID, sp sentPing) {
//
// The caller should use de.discoKey as the discoKey argument.
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, logLevel discoLogLevel) {
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, logLevel discoLogLevel, mtu int) {
sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{
TxID: [12]byte(txid),
NodeKey: de.c.publicKeyAtomic.Load(),
}, logLevel)
}, logLevel, mtu)
if !sent {
de.forgetDiscoPing(txid)
}
@ -4676,6 +4751,10 @@ const (
)
func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose) {
de.startDiscoPingLockedMTU(ep, now, purpose, 0)
}
func (de *endpoint) startDiscoPingLockedMTU(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, mtu int) {
if runtime.GOOS == "js" {
return
}
@ -4700,12 +4779,13 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
at: now,
timer: time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }),
purpose: purpose,
mtu: mtu,
}
logLevel := discoLog
if purpose == pingHeartbeat {
logLevel = discoVerboseLog
}
go de.sendDiscoPing(ep, epDisco.key, txid, logLevel)
go de.sendDiscoPing(ep, epDisco.key, txid, logLevel, mtu)
}
func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
@ -4729,8 +4809,20 @@ func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
if firstPing && sendCallMeMaybe {
de.c.dlogf("[v1] magicsock: disco: send, starting discovery for %v (%v)", de.publicKey.ShortString(), de.discoShort())
}
de.startDiscoPingLocked(ep, now, pingDiscovery)
// Send a bouquet of pings in different sizes to probe
// peer mtu, but only if this is not a derp addr.
if de.derpAddr.IsValid() {
de.startDiscoPingLocked(ep, now, pingDiscovery)
} else {
for _, mtu := range usefulMtus {
// XXX only send pings less than the mtu of the interface used to reach this endpoint
de.startDiscoPingLockedMTU(ep, now, pingDiscovery, mtu)
// XXX Would be nice to pause a bit to
// give the largest mtu an advantage
// in returning first, but we're
// holding a lock here.
}
}
}
derpAddr := de.derpAddr
if sentAny && sendCallMeMaybe && derpAddr.IsValid() {
@ -4932,15 +5024,23 @@ func (de *endpoint) updateFromNode(n *tailcfg.Node, heartbeatDisabled bool) {
//
// This is called once we've already verified that we got a valid
// discovery message from de via ep.
func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.TxID) (duplicatePing bool) {
func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.TxID, mtu int) (duplicatePing bool) {
de.mu.Lock()
defer de.mu.Unlock()
// lookup the current end point state
if st, ok := de.endpointState[ep]; ok {
// oh, we already have this endpoint in our list!
// check to see if this is a resend of the same ping as the one we got most recently
duplicatePing = forRxPingTxID == st.lastGotPingTxID
if !duplicatePing {
st.lastGotPingTxID = forRxPingTxID
}
// If this ping has a higher mtu, update it in the endpoint
if mtu > st.mtu {
de.c.logf("UPDATING mtu to %v", mtu)
st.mtu = mtu
}
if st.lastGotPing.IsZero() {
// Already-known endpoint from the network map.
return duplicatePing
@ -4950,10 +5050,12 @@ func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.T
}
// Newly discovered endpoint. Exciting!
de.c.dlogf("[v1] magicsock: disco: adding %v as candidate endpoint for %v (%s)", ep, de.discoShort(), de.publicKey.ShortString())
de.c.dlogf("[v1] magicsock: disco: adding %v as candidate endpoint for %v (%s) mtu %v", ep, de.discoShort(), de.publicKey.ShortString(), mtu)
// Currently this adds a separate endpoint for each test MTU, would be better to update in place
de.endpointState[ep] = &endpointState{
lastGotPing: time.Now(),
lastGotPingTxID: forRxPingTxID,
mtu: mtu,
}
// If for some reason this gets very large, do some cleanup.
@ -4983,7 +5085,7 @@ func (de *endpoint) noteConnectivityChange() {
// It should be called with the Conn.mu held.
//
// It reports whether m.TxID corresponds to a ping that this endpoint sent.
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort) (knownTxID bool) {
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort, msgLen int) (knownTxID bool) {
de.mu.Lock()
defer de.mu.Unlock()
@ -4995,6 +5097,19 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
return false
}
knownTxID = true // for naked returns below
// adjust mtu for ip header
// XXX assuming ipv6
var mtu int
// If the msgLen is the size of the bare pong packet, ignore it
// XXX make this much less terrible
if msgLen > 124 {
de.c.logf("adding 40 bytes to msgLen %v", msgLen)
mtu = msgLen + 40
}
// The mtu of the returned pong should be the same as the sent ping
if sp.mtu != mtu {
de.c.logf("error! pong mtu %v does not match ping mtu %v", mtu, sp.mtu)
}
de.removeSentDiscoPingLocked(m.TxID, sp)
now := mono.Now()
@ -5014,15 +5129,16 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
pongAt: now,
from: src,
pongSrc: m.Src,
mtu: mtu,
})
}
if sp.purpose != pingHeartbeat {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pong.src=%v%v mtu %v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
if sp.to != src {
fmt.Fprintf(bw, " ping.to=%v", sp.to)
}
}))
}), mtu)
}
for _, pp := range de.pendingCLIPings {
@ -5034,9 +5150,9 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
// Promote this pong response to our current best address if it's lower latency.
// TODO(bradfitz): decide how latency vs. preference order affects decision
if !isDerp {
thisPong := addrLatency{sp.to, latency}
thisPong := addrQuality{sp.to, latency, mtu}
if betterAddr(thisPong, de.bestAddr) {
de.c.logf("magicsock: disco: node %v %v now using %v", de.publicKey.ShortString(), de.discoShort(), sp.to)
de.c.logf("UPDATING MTU magicsock: disco: node %v %v now using %v mtu %v", de.publicKey.ShortString(), de.discoShort(), sp.to, mtu)
de.debugUpdates.Add(EndpointChange{
When: time.Now(),
What: "handlePingLocked-bestAddr-update",
@ -5074,19 +5190,23 @@ func portableTrySetSocketBuffer(pconn nettype.PacketConn, logf logger.Logf) {
}
}
// addrLatency is an IPPort with an associated latency.
type addrLatency struct {
// addrQuality is an IPPort with an associated latency and MTU.
type addrQuality struct {
netip.AddrPort
latency time.Duration
mtu int
}
func (a addrLatency) String() string {
return a.AddrPort.String() + "@" + a.latency.String()
func (a addrQuality) String() string {
return a.AddrPort.String() + "@" + a.latency.String() + "+" + strconv.Itoa(a.mtu)
}
// betterAddr reports whether a is a better addr to use than b.
func betterAddr(a, b addrLatency) bool {
func betterAddr(a, b addrQuality) bool {
if a.AddrPort == b.AddrPort {
if a.mtu > b.mtu {
return true
}
return false
}
if !b.IsValid() {
@ -5282,7 +5402,7 @@ func (de *endpoint) stopAndReset() {
func (de *endpoint) resetLocked() {
de.lastSend = 0
de.lastFullPing = 0
de.bestAddr = addrLatency{}
de.bestAddr = addrQuality{}
de.bestAddrAt = 0
de.trustBestAddrUntil = 0
for _, es := range de.endpointState {

View File

@ -1627,10 +1627,13 @@ func TestEndpointSetsEqual(t *testing.T) {
func TestBetterAddr(t *testing.T) {
const ms = time.Millisecond
al := func(ipps string, d time.Duration) addrLatency {
return addrLatency{netip.MustParseAddrPort(ipps), d}
al := func(ipps string, d time.Duration) addrQuality {
return addrQuality{AddrPort: netip.MustParseAddrPort(ipps), latency: d, mtu: 0}
}
zero := addrLatency{}
almtu := func(ipps string, d time.Duration, mtu int) addrQuality {
return addrQuality{AddrPort: netip.MustParseAddrPort(ipps), latency: d, mtu: mtu}
}
zero := addrQuality{}
const (
publicV4 = "1.2.3.4:555"
@ -1641,7 +1644,7 @@ func TestBetterAddr(t *testing.T) {
)
tests := []struct {
a, b addrLatency
a, b addrQuality
want bool // whether a is better than b
}{
{a: zero, b: zero, want: false},
@ -1703,7 +1706,12 @@ func TestBetterAddr(t *testing.T) {
b: al(publicV6, 100*ms),
want: true,
},
// If addresses are equal, prefer larger MTU
{
a: almtu(publicV4, 30*ms, 1500),
b: almtu(publicV4, 30*ms, 0),
want: true,
},
// Private IPs are preferred over public IPs even if the public
// IP is IPv6.
{

View File

@ -150,6 +150,7 @@ const nicID = 1
// maxUDPPacketSize is the maximum size of a UDP packet we copy in startPacketCopy
// when relaying UDP packets. We don't use the 'mtu' const in anticipation of
// one day making the MTU more dynamic.
// TODO: make this bigger
const maxUDPPacketSize = 1500
// Create creates and populates a new Impl.

View File

@ -1211,7 +1211,7 @@ func (e *userspaceEngine) UpdateStatus(sb *ipnstate.StatusBuilder) {
e.magicConn.UpdateStatus(sb)
}
func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult)) {
func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult), mtu int) {
res := &ipnstate.PingResult{IP: ip.String()}
pip, ok := e.PeerForIP(ip)
if !ok {
@ -1227,11 +1227,10 @@ func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func
return
}
peer := pip.Node
e.logf("ping(%v): sending %v ping to %v %v ...", ip, pingType, peer.Key.ShortString(), peer.ComputedName)
e.logf("ping(%v): sending %v ping mtu %v to %v %v...", ip, pingType, mtu, peer.Key.ShortString(), peer.ComputedName)
switch pingType {
case "disco":
e.magicConn.Ping(peer, res, cb)
e.magicConn.Ping(peer, res, cb, mtu)
case "TSMP":
e.sendTSMPPing(ip, peer, res, cb)
case "ICMP":

View File

@ -158,8 +158,8 @@ func (e *watchdogEngine) DiscoPublicKey() (k key.DiscoPublic) {
e.watchdog("DiscoPublicKey", func() { k = e.wrap.DiscoPublicKey() })
return k
}
func (e *watchdogEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult)) {
e.watchdog("Ping", func() { e.wrap.Ping(ip, pingType, cb) })
func (e *watchdogEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult), mtu int) {
e.watchdog("Ping", func() { e.wrap.Ping(ip, pingType, cb, mtu) })
}
func (e *watchdogEngine) RegisterIPPortIdentity(ipp netip.AddrPort, tsIP netip.Addr) {
e.watchdog("RegisterIPPortIdentity", func() { e.wrap.RegisterIPPortIdentity(ipp, tsIP) })

View File

@ -152,7 +152,7 @@ type Engine interface {
// Ping is a request to start a ping with the peer handling the given IP and
// then call cb with its ping latency & method.
Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult))
Ping(ip netip.Addr, pingType tailcfg.PingType, cb func(*ipnstate.PingResult), mtu int)
// RegisterIPPortIdentity registers a given node (identified by its
// Tailscale IP) as temporarily having the given IP:port for whois lookups.