From 905141e4ca18600cc51f5953b4e1ba5d28ac60d5 Mon Sep 17 00:00:00 2001 From: Simon Law Date: Thu, 23 Apr 2026 17:44:57 -0700 Subject: [PATCH] feature/routecheck,ipn/routcheck: probe reachability in the background Refreshing the routecheck.Client would probe to generate a new routecheck.Report, but this method was only wired up to the LocalAPI and the `tailscale routecheck` command. This patch adds a Start and Close method to the routecheck.Client and starts it in the background from features/routecheck. This patch also introduces a new OnPeersReceived hook, which fires a callback whenever a new network map has been received with the latest peers, This signals to the routecheck.Client that it might need to schedule another probe, if the shape of the routing table has changed materially. Updates #17366 Updates tailscale/corp#33033 Signed-off-by: Simon Law --- cmd/k8s-operator/depaware.txt | 2 +- cmd/tailscaled/depaware.txt | 2 +- cmd/tsidp/depaware.txt | 2 +- feature/routecheck/routecheck.go | 24 +++++++ ipn/ipnext/ipnext.go | 4 ++ ipn/ipnlocal/local.go | 7 +++ ipn/routecheck/probe.go | 29 ++++++--- ipn/routecheck/report.go | 30 +++++---- ipn/routecheck/routecheck.go | 104 +++++++++++++++++++++++++++++++ tailcfg/tailcfg.go | 5 ++ tsnet/depaware.txt | 2 +- 11 files changed, 187 insertions(+), 24 deletions(-) diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index 40f5870d5..298013b50 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -826,7 +826,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ tailscale.com/ipn/ipnlocal/netmapcache from tailscale.com/ipn/ipnlocal tailscale.com/ipn/ipnstate from tailscale.com/client/local+ tailscale.com/ipn/localapi from tailscale.com/tsnet - tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck+ + tailscale.com/ipn/routecheck from tailscale.com/client/local+ tailscale.com/ipn/store from tailscale.com/ipn/ipnlocal+ tailscale.com/ipn/store/kubestore from tailscale.com/cmd/k8s-operator tailscale.com/ipn/store/mem from tailscale.com/ipn/ipnlocal+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 33aa2b6d6..0e2cef335 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -328,7 +328,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/ipn/ipnstate from tailscale.com/client/local+ tailscale.com/ipn/localapi from tailscale.com/ipn/ipnserver+ tailscale.com/ipn/policy from tailscale.com/feature/portlist - tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck+ + tailscale.com/ipn/routecheck from tailscale.com/client/local+ tailscale.com/ipn/store from tailscale.com/cmd/tailscaled+ L tailscale.com/ipn/store/awsstore from tailscale.com/feature/condregister L tailscale.com/ipn/store/kubestore from tailscale.com/feature/condregister diff --git a/cmd/tsidp/depaware.txt b/cmd/tsidp/depaware.txt index 9357e445d..9eb506a3a 100644 --- a/cmd/tsidp/depaware.txt +++ b/cmd/tsidp/depaware.txt @@ -245,7 +245,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar tailscale.com/ipn/ipnlocal/netmapcache from tailscale.com/ipn/ipnlocal tailscale.com/ipn/ipnstate from tailscale.com/client/local+ tailscale.com/ipn/localapi from tailscale.com/tsnet - tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck+ + tailscale.com/ipn/routecheck from tailscale.com/client/local+ tailscale.com/ipn/store from tailscale.com/ipn/ipnlocal+ tailscale.com/ipn/store/mem from tailscale.com/ipn/ipnlocal+ tailscale.com/kube/kubetypes from tailscale.com/envknob diff --git a/feature/routecheck/routecheck.go b/feature/routecheck/routecheck.go index aac603a11..82eb932cc 100644 --- a/feature/routecheck/routecheck.go +++ b/feature/routecheck/routecheck.go @@ -8,10 +8,12 @@ package routecheck import ( + "context" "fmt" "tailscale.com/ipn/ipnext" "tailscale.com/ipn/routecheck" + "tailscale.com/tailcfg" "tailscale.com/types/logger" ) @@ -60,10 +62,32 @@ func (e *Extension) Init(h ipnext.Host) error { } e.Client = c + h.Hooks().OnPeersReceived.Add(e.onPeersReceived) + h.Hooks().OnSelfChange.Add(e.onSelfChange) + + go c.Start(context.Background()) return nil } // Shutdown implements the [ipnext.Extension.Shutdown] interface method. func (e *Extension) Shutdown() error { + e.Client.Close() return nil } + +func (e *Extension) onPeersReceived(peers []tailcfg.NodeView) { + e.needsRefresh() +} + +func (e *Extension) onSelfChange(self tailcfg.NodeView) { + e.needsRefresh() +} + +func (e *Extension) needsRefresh() { + self := e.nb.NodeBackend().Self() + if !(self.HasCap(tailcfg.NodeAttrClientSideReachability) && + self.HasCap(tailcfg.NodeAttrClientSideReachabilityRouteCheck)) { + return + } + e.Client.NeedsRefresh() +} diff --git a/ipn/ipnext/ipnext.go b/ipn/ipnext/ipnext.go index b620d8609..b5cd4d42b 100644 --- a/ipn/ipnext/ipnext.go +++ b/ipn/ipnext/ipnext.go @@ -375,6 +375,10 @@ type Hooks struct { // is created. It is called with the LocalBackend locked. NewControlClient feature.Hooks[NewControlClientCallback] + // OnPeersReceived is called (with LocalBackend.mu held) when a peer map has been received, + // whether or not it changed. + OnPeersReceived feature.Hooks[func([]tailcfg.NodeView)] + // OnSelfChange is called (with LocalBackend.mu held) when the self node // changes, including changing to nothing (an invalid view). OnSelfChange feature.Hooks[func(tailcfg.NodeView)] diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 6178fabc4..f260430cd 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -6385,6 +6385,13 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) { } b.pauseOrResumeControlClientLocked() + if buildfeatures.HasRouteCheck { + peers := b.currentNode().Peers() + for _, f := range b.extHost.Hooks().OnPeersReceived { + f(peers) + } + } + if nm != nil { messages := make(map[tailcfg.DisplayMessageID]tailcfg.DisplayMessage) for id, msg := range nm.DisplayMessages { diff --git a/ipn/routecheck/probe.go b/ipn/routecheck/probe.go index cf6289330..8e6514cce 100644 --- a/ipn/routecheck/probe.go +++ b/ipn/routecheck/probe.go @@ -76,14 +76,9 @@ func (c *Client) probe(ctx context.Context, nodes iter.Seq[probed], limit int, t // // This function tries both the IPv4 and IPv6 addresses func (c *Client) Probe(ctx context.Context, nodes iter.Seq[tailcfg.NodeView], limit int, timeout time.Duration) (*Report, error) { - var canIPv4, canIPv6 bool - for _, ip := range c.nb.NodeBackend().Self().Addresses().All() { - addr := ip.Addr() - if addr.Is4() { - canIPv4 = true - } else if addr.Is6() { - canIPv6 = true - } + canIPv4, canIPv6 := supportsIPVersions(c.nb.NodeBackend().Self()) + if !(canIPv4 || canIPv6) { + return nil, nil } var dsts iter.Seq[probed] = func(yield func(probed) bool) { @@ -188,3 +183,21 @@ func (c *Client) ping(ctx context.Context, ip netip.Addr, timeout time.Duration) } } + +func supportsIPVersions(n tailcfg.NodeView) (ipv4, ipv6 bool) { + if !n.Valid() { + return false, false + } + for _, ip := range n.Addresses().All() { + addr := ip.Addr() + if addr.Is4() { + ipv4 = true + } else if addr.Is6() { + ipv6 = true + } + if ipv4 && ipv6 { + break + } + } + return ipv4, ipv6 +} diff --git a/ipn/routecheck/report.go b/ipn/routecheck/report.go index 758d2be74..00272c076 100644 --- a/ipn/routecheck/report.go +++ b/ipn/routecheck/report.go @@ -4,7 +4,6 @@ package routecheck import ( - "context" "encoding/json" "maps" "net/netip" @@ -12,22 +11,13 @@ import ( "time" "tailscale.com/tailcfg" + "tailscale.com/util/mak" ) // Report returns the latest reachability report. // Returns nil if a report isn’t available, which happens during initialization. func (c *Client) Report() *Report { - nm := c.nm.NetMap() - if nm == nil { - return nil // The report wasn’t available. - } - - // TODO(sfllaw): Return the latest snapshot produced by background probing. - r, err := c.Refresh(context.TODO(), DefaultTimeout) - if err != nil { - c.logf("%v", err) - } - return r + return c.report.Load() } // Report contains the result of a single routecheck. @@ -40,6 +30,19 @@ type Report struct { Reachable nodeset `json:"reachable"` } +// RoutablePrefixes returns a [RoutingTable] mapping routable network prefixes +// with the associated routers that were reachable by the current host, +// at the time the report was finished. +func (rp Report) RoutablePrefixes() RoutingTable { + var out map[netip.Prefix][]Node + for _, n := range rp.Reachable { + for _, p := range n.Routes { + mak.Set(&out, p, append(out[p], n)) + } + } + return out +} + // Node represents a node in the reachability report. type Node struct { ID tailcfg.NodeID `json:"id"` @@ -81,3 +84,6 @@ func (ns nodeset) UnmarshalJSON(b []byte) error { } return nil } + +// RoutingTable is a map of routers, keyed by the network prefix for which they route. +type RoutingTable map[netip.Prefix][]Node diff --git a/ipn/routecheck/routecheck.go b/ipn/routecheck/routecheck.go index b3c2f1831..f789a7d43 100644 --- a/ipn/routecheck/routecheck.go +++ b/ipn/routecheck/routecheck.go @@ -9,12 +9,14 @@ import ( "errors" "fmt" "net/netip" + "sync/atomic" "time" "tailscale.com/ipn/ipnstate" "tailscale.com/tailcfg" "tailscale.com/types/logger" "tailscale.com/types/netmap" + "tailscale.com/util/mak" ) // Client generates Reports describing the result of both passive and active @@ -31,6 +33,10 @@ type Client struct { nb NodeBackender nm NetMapWaiter pinger Pinger + + needsRefresh chan struct{} // used to signal the need for refresh + stop context.CancelFunc + report atomic.Pointer[Report] } // NetMapWaiter is the interface that returns the current [netmap.NetworkMap]. @@ -76,11 +82,14 @@ func NewClient(logf logger.Logf, nb NodeBackender, nm NetMapWaiter, pinger Pinge if pinger == nil { return nil, errors.New("Pinger must be set") } + return &Client{ Logf: logf, nb: nb, nm: nm, pinger: pinger, + + needsRefresh: make(chan struct{}, 1), }, nil } @@ -93,3 +102,98 @@ func (c *Client) Refresh(ctx context.Context, timeout time.Duration) (*Report, e } return r, nil } + +// NeedsRefresh signals the need for a [Client.Refresh], which will be done in the background. +func (c *Client) NeedsRefresh() { + select { + case c.needsRefresh <- struct{}{}: + default: + } +} + +// Start +func (c *Client) Start(ctx context.Context) { + first := true + ctx, cancel := context.WithCancel(ctx) + c.stop = cancel + for { + select { + case <-c.needsRefresh: + nm := c.nm.NetMap() + if nm == nil { + continue // The report wasn’t available. + } + + if first { + r := c.bootstrap(nm) + c.report.Store(r) + first = false + } + + // TODO(sfllaw): Examine the shape of the overlapping + // routers and only probe if the routing table has + // changed sufficiently. For instance, a new router has + // come online or a router has been removed or a set of + // routers no longer overlap. + + r, err := c.Refresh(ctx, DefaultTimeout) + if err != nil { + c.logf("%v", err) + continue + } + c.report.Store(r) + case <-ctx.Done(): + return + } + } +} + +// Bootstrap assumes that nodes that are connected to the control plane are reachable, +// while waiting for the first probe to finish. +func (c *Client) bootstrap(nm *netmap.NetworkMap) *Report { + if nm == nil { + return nil + } + + canIPv4, canIPv6 := supportsIPVersions(c.nb.NodeBackend().Self()) + if !(canIPv4 || canIPv6) { + return nil + } + + var r Report + for _, n := range nm.Peers { + for _, ip := range n.Addresses().All() { + // Match the IP versions + addr := ip.Addr() + if addr.Is4() && !canIPv4 { + continue + } + if addr.Is6() && !canIPv6 { + continue + } + + mak.Set(&r.Reachable, n.ID(), Node{ + ID: n.ID(), + Name: n.Name(), + Addr: addr, + Routes: routes(n), + }) + break + } + } + r.Done = time.Now() + return &r +} + +// Close +func (c *Client) Close() error { + if c == nil { + return nil + } + + close(c.needsRefresh) + if c.stop != nil { + c.stop() + } + return nil +} diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 3d7921d75..cbdd7799e 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -2751,6 +2751,11 @@ const ( // node is no longer online, but that is not a reliable signal. NodeAttrClientSideReachability = "client-side-reachability" + // NodeAttrClientSideReachabilityRouteCheck configures the node to use + // the routecheck subsystem to determine reachability when choosing + // connectors. This relies on [NodeAttrClientSideReachability] being set. + NodeAttrClientSideReachabilityRouteCheck = "client-side-reachability:routecheck" + // NodeAttrDefaultAutoUpdate advertises the default node auto-update setting // for this tailnet. The node is free to opt-in or out locally regardless of // this value. Once this has been set and stored in the client, future diff --git a/tsnet/depaware.txt b/tsnet/depaware.txt index d71152865..89c3ca167 100644 --- a/tsnet/depaware.txt +++ b/tsnet/depaware.txt @@ -241,7 +241,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware) tailscale.com/ipn/ipnlocal/netmapcache from tailscale.com/ipn/ipnlocal tailscale.com/ipn/ipnstate from tailscale.com/client/local+ tailscale.com/ipn/localapi from tailscale.com/tsnet - tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck+ + tailscale.com/ipn/routecheck from tailscale.com/client/local+ tailscale.com/ipn/store from tailscale.com/ipn/ipnlocal+ tailscale.com/ipn/store/mem from tailscale.com/ipn/ipnlocal+ tailscale.com/kube/kubetypes from tailscale.com/envknob