feature/routecheck,ipn/routcheck: probe reachability in the background

Refreshing the routecheck.Client would probe to generate a new
routecheck.Report, but this method was only wired up to the LocalAPI
and the `tailscale routecheck` command.

This patch adds a Start and Close method to the routecheck.Client and
starts it in the background from features/routecheck.

This patch also introduces a new OnPeersReceived hook, which fires a
callback whenever a new network map has been received with the latest
peers, This signals to the routecheck.Client that it might need to
schedule another probe, if the shape of the routing table has changed
materially.

Updates #17366
Updates tailscale/corp#33033

Signed-off-by: Simon Law <sfllaw@tailscale.com>
This commit is contained in:
Simon Law 2026-04-23 17:44:57 -07:00
parent 8c714b2ac5
commit 905141e4ca
No known key found for this signature in database
GPG Key ID: B83D1EE07548341D
11 changed files with 187 additions and 24 deletions

View File

@ -826,7 +826,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/ipn/ipnlocal/netmapcache from tailscale.com/ipn/ipnlocal
tailscale.com/ipn/ipnstate from tailscale.com/client/local+
tailscale.com/ipn/localapi from tailscale.com/tsnet
tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck+
tailscale.com/ipn/routecheck from tailscale.com/client/local+
tailscale.com/ipn/store from tailscale.com/ipn/ipnlocal+
tailscale.com/ipn/store/kubestore from tailscale.com/cmd/k8s-operator
tailscale.com/ipn/store/mem from tailscale.com/ipn/ipnlocal+

View File

@ -328,7 +328,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/ipn/ipnstate from tailscale.com/client/local+
tailscale.com/ipn/localapi from tailscale.com/ipn/ipnserver+
tailscale.com/ipn/policy from tailscale.com/feature/portlist
tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck+
tailscale.com/ipn/routecheck from tailscale.com/client/local+
tailscale.com/ipn/store from tailscale.com/cmd/tailscaled+
L tailscale.com/ipn/store/awsstore from tailscale.com/feature/condregister
L tailscale.com/ipn/store/kubestore from tailscale.com/feature/condregister

View File

@ -245,7 +245,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar
tailscale.com/ipn/ipnlocal/netmapcache from tailscale.com/ipn/ipnlocal
tailscale.com/ipn/ipnstate from tailscale.com/client/local+
tailscale.com/ipn/localapi from tailscale.com/tsnet
tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck+
tailscale.com/ipn/routecheck from tailscale.com/client/local+
tailscale.com/ipn/store from tailscale.com/ipn/ipnlocal+
tailscale.com/ipn/store/mem from tailscale.com/ipn/ipnlocal+
tailscale.com/kube/kubetypes from tailscale.com/envknob

View File

@ -8,10 +8,12 @@
package routecheck
import (
"context"
"fmt"
"tailscale.com/ipn/ipnext"
"tailscale.com/ipn/routecheck"
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
)
@ -60,10 +62,32 @@ func (e *Extension) Init(h ipnext.Host) error {
}
e.Client = c
h.Hooks().OnPeersReceived.Add(e.onPeersReceived)
h.Hooks().OnSelfChange.Add(e.onSelfChange)
go c.Start(context.Background())
return nil
}
// Shutdown implements the [ipnext.Extension.Shutdown] interface method.
func (e *Extension) Shutdown() error {
e.Client.Close()
return nil
}
func (e *Extension) onPeersReceived(peers []tailcfg.NodeView) {
e.needsRefresh()
}
func (e *Extension) onSelfChange(self tailcfg.NodeView) {
e.needsRefresh()
}
func (e *Extension) needsRefresh() {
self := e.nb.NodeBackend().Self()
if !(self.HasCap(tailcfg.NodeAttrClientSideReachability) &&
self.HasCap(tailcfg.NodeAttrClientSideReachabilityRouteCheck)) {
return
}
e.Client.NeedsRefresh()
}

View File

@ -375,6 +375,10 @@ type Hooks struct {
// is created. It is called with the LocalBackend locked.
NewControlClient feature.Hooks[NewControlClientCallback]
// OnPeersReceived is called (with LocalBackend.mu held) when a peer map has been received,
// whether or not it changed.
OnPeersReceived feature.Hooks[func([]tailcfg.NodeView)]
// OnSelfChange is called (with LocalBackend.mu held) when the self node
// changes, including changing to nothing (an invalid view).
OnSelfChange feature.Hooks[func(tailcfg.NodeView)]

View File

@ -6385,6 +6385,13 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) {
}
b.pauseOrResumeControlClientLocked()
if buildfeatures.HasRouteCheck {
peers := b.currentNode().Peers()
for _, f := range b.extHost.Hooks().OnPeersReceived {
f(peers)
}
}
if nm != nil {
messages := make(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage)
for id, msg := range nm.DisplayMessages {

View File

@ -76,14 +76,9 @@ func (c *Client) probe(ctx context.Context, nodes iter.Seq[probed], limit int, t
//
// This function tries both the IPv4 and IPv6 addresses
func (c *Client) Probe(ctx context.Context, nodes iter.Seq[tailcfg.NodeView], limit int, timeout time.Duration) (*Report, error) {
var canIPv4, canIPv6 bool
for _, ip := range c.nb.NodeBackend().Self().Addresses().All() {
addr := ip.Addr()
if addr.Is4() {
canIPv4 = true
} else if addr.Is6() {
canIPv6 = true
}
canIPv4, canIPv6 := supportsIPVersions(c.nb.NodeBackend().Self())
if !(canIPv4 || canIPv6) {
return nil, nil
}
var dsts iter.Seq[probed] = func(yield func(probed) bool) {
@ -188,3 +183,21 @@ func (c *Client) ping(ctx context.Context, ip netip.Addr, timeout time.Duration)
}
}
func supportsIPVersions(n tailcfg.NodeView) (ipv4, ipv6 bool) {
if !n.Valid() {
return false, false
}
for _, ip := range n.Addresses().All() {
addr := ip.Addr()
if addr.Is4() {
ipv4 = true
} else if addr.Is6() {
ipv6 = true
}
if ipv4 && ipv6 {
break
}
}
return ipv4, ipv6
}

View File

@ -4,7 +4,6 @@
package routecheck
import (
"context"
"encoding/json"
"maps"
"net/netip"
@ -12,22 +11,13 @@ import (
"time"
"tailscale.com/tailcfg"
"tailscale.com/util/mak"
)
// Report returns the latest reachability report.
// Returns nil if a report isnt available, which happens during initialization.
func (c *Client) Report() *Report {
nm := c.nm.NetMap()
if nm == nil {
return nil // The report wasnt available.
}
// TODO(sfllaw): Return the latest snapshot produced by background probing.
r, err := c.Refresh(context.TODO(), DefaultTimeout)
if err != nil {
c.logf("%v", err)
}
return r
return c.report.Load()
}
// Report contains the result of a single routecheck.
@ -40,6 +30,19 @@ type Report struct {
Reachable nodeset `json:"reachable"`
}
// RoutablePrefixes returns a [RoutingTable] mapping routable network prefixes
// with the associated routers that were reachable by the current host,
// at the time the report was finished.
func (rp Report) RoutablePrefixes() RoutingTable {
var out map[netip.Prefix][]Node
for _, n := range rp.Reachable {
for _, p := range n.Routes {
mak.Set(&out, p, append(out[p], n))
}
}
return out
}
// Node represents a node in the reachability report.
type Node struct {
ID tailcfg.NodeID `json:"id"`
@ -81,3 +84,6 @@ func (ns nodeset) UnmarshalJSON(b []byte) error {
}
return nil
}
// RoutingTable is a map of routers, keyed by the network prefix for which they route.
type RoutingTable map[netip.Prefix][]Node

View File

@ -9,12 +9,14 @@ import (
"errors"
"fmt"
"net/netip"
"sync/atomic"
"time"
"tailscale.com/ipn/ipnstate"
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
"tailscale.com/types/netmap"
"tailscale.com/util/mak"
)
// Client generates Reports describing the result of both passive and active
@ -31,6 +33,10 @@ type Client struct {
nb NodeBackender
nm NetMapWaiter
pinger Pinger
needsRefresh chan struct{} // used to signal the need for refresh
stop context.CancelFunc
report atomic.Pointer[Report]
}
// NetMapWaiter is the interface that returns the current [netmap.NetworkMap].
@ -76,11 +82,14 @@ func NewClient(logf logger.Logf, nb NodeBackender, nm NetMapWaiter, pinger Pinge
if pinger == nil {
return nil, errors.New("Pinger must be set")
}
return &Client{
Logf: logf,
nb: nb,
nm: nm,
pinger: pinger,
needsRefresh: make(chan struct{}, 1),
}, nil
}
@ -93,3 +102,98 @@ func (c *Client) Refresh(ctx context.Context, timeout time.Duration) (*Report, e
}
return r, nil
}
// NeedsRefresh signals the need for a [Client.Refresh], which will be done in the background.
func (c *Client) NeedsRefresh() {
select {
case c.needsRefresh <- struct{}{}:
default:
}
}
// Start
func (c *Client) Start(ctx context.Context) {
first := true
ctx, cancel := context.WithCancel(ctx)
c.stop = cancel
for {
select {
case <-c.needsRefresh:
nm := c.nm.NetMap()
if nm == nil {
continue // The report wasnt available.
}
if first {
r := c.bootstrap(nm)
c.report.Store(r)
first = false
}
// TODO(sfllaw): Examine the shape of the overlapping
// routers and only probe if the routing table has
// changed sufficiently. For instance, a new router has
// come online or a router has been removed or a set of
// routers no longer overlap.
r, err := c.Refresh(ctx, DefaultTimeout)
if err != nil {
c.logf("%v", err)
continue
}
c.report.Store(r)
case <-ctx.Done():
return
}
}
}
// Bootstrap assumes that nodes that are connected to the control plane are reachable,
// while waiting for the first probe to finish.
func (c *Client) bootstrap(nm *netmap.NetworkMap) *Report {
if nm == nil {
return nil
}
canIPv4, canIPv6 := supportsIPVersions(c.nb.NodeBackend().Self())
if !(canIPv4 || canIPv6) {
return nil
}
var r Report
for _, n := range nm.Peers {
for _, ip := range n.Addresses().All() {
// Match the IP versions
addr := ip.Addr()
if addr.Is4() && !canIPv4 {
continue
}
if addr.Is6() && !canIPv6 {
continue
}
mak.Set(&r.Reachable, n.ID(), Node{
ID: n.ID(),
Name: n.Name(),
Addr: addr,
Routes: routes(n),
})
break
}
}
r.Done = time.Now()
return &r
}
// Close
func (c *Client) Close() error {
if c == nil {
return nil
}
close(c.needsRefresh)
if c.stop != nil {
c.stop()
}
return nil
}

View File

@ -2751,6 +2751,11 @@ const (
// node is no longer online, but that is not a reliable signal.
NodeAttrClientSideReachability = "client-side-reachability"
// NodeAttrClientSideReachabilityRouteCheck configures the node to use
// the routecheck subsystem to determine reachability when choosing
// connectors. This relies on [NodeAttrClientSideReachability] being set.
NodeAttrClientSideReachabilityRouteCheck = "client-side-reachability:routecheck"
// NodeAttrDefaultAutoUpdate advertises the default node auto-update setting
// for this tailnet. The node is free to opt-in or out locally regardless of
// this value. Once this has been set and stored in the client, future

View File

@ -241,7 +241,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware)
tailscale.com/ipn/ipnlocal/netmapcache from tailscale.com/ipn/ipnlocal
tailscale.com/ipn/ipnstate from tailscale.com/client/local+
tailscale.com/ipn/localapi from tailscale.com/tsnet
tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck+
tailscale.com/ipn/routecheck from tailscale.com/client/local+
tailscale.com/ipn/store from tailscale.com/ipn/ipnlocal+
tailscale.com/ipn/store/mem from tailscale.com/ipn/ipnlocal+
tailscale.com/kube/kubetypes from tailscale.com/envknob