mirror of
https://github.com/tailscale/tailscale.git
synced 2026-05-05 04:06:35 +02:00
ipn/ipnlocal: use routecheck reports to make exit node suggestions
Now that the routecheck subsystem is continuously collecting reachability reports in the background, we can add a hook to LocalBackend for fetching its report. That allows suggestExitNodeUsingTrafficSteering to consult that report when disqualifying candidates. Exit node suggestions will only consult the report when the `client-side-reachability` and `client-side-reachability:routecheck` node attributes are both set on the current node. Updates #17366 Updates tailscale/corp#33033 Signed-off-by: Simon Law <sfllaw@tailscale.com>
This commit is contained in:
parent
905141e4ca
commit
73f52284f6
@ -12,6 +12,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"tailscale.com/ipn/ipnext"
|
||||
"tailscale.com/ipn/ipnlocal"
|
||||
"tailscale.com/ipn/routecheck"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/logger"
|
||||
@ -28,6 +29,8 @@ func init() {
|
||||
backend: b,
|
||||
}, nil
|
||||
})
|
||||
|
||||
ipnlocal.HookRouteCheckReport.Set(routeCheckReport)
|
||||
}
|
||||
|
||||
// Extension implements the [ipnext.Extension] interface.
|
||||
@ -91,3 +94,7 @@ func (e *Extension) needsRefresh() {
|
||||
}
|
||||
e.Client.NeedsRefresh()
|
||||
}
|
||||
|
||||
func routeCheckReport(b *ipnlocal.LocalBackend) ipnlocal.RouteCheckReport {
|
||||
return ClientFor(b).Report()
|
||||
}
|
||||
|
||||
@ -7573,6 +7573,9 @@ func allowedAutoRoute(ipp netip.Prefix) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// HookRouteCheckReport is the hook that returns the latest routecheck Report for this LocalBackend.
|
||||
var HookRouteCheckReport feature.Hook[func(*LocalBackend) RouteCheckReport]
|
||||
|
||||
var ErrNoPreferredDERP = errors.New("no preferred DERP, try again later")
|
||||
|
||||
// suggestExitNodeLocked computes a suggestion based on the current netmap and
|
||||
@ -7588,7 +7591,12 @@ func (b *LocalBackend) suggestExitNodeLocked() (response apitype.ExitNodeSuggest
|
||||
lastReport := b.MagicConn().GetLastNetcheckReport(b.ctx)
|
||||
prevSuggestion := b.lastSuggestedExitNode
|
||||
|
||||
res, err := suggestExitNode(lastReport, b.currentNode(), prevSuggestion, randomRegion, randomNode, b.getAllowedSuggestions())
|
||||
var rp RouteCheckReport
|
||||
if HookRouteCheckReport.IsSet() {
|
||||
hook := HookRouteCheckReport.Get()
|
||||
rp = hook(b)
|
||||
}
|
||||
res, err := suggestExitNode(lastReport, rp, b.currentNode(), prevSuggestion, randomRegion, randomNode, b.getAllowedSuggestions())
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
@ -7660,11 +7668,11 @@ func fillAllowedSuggestions(polc policyclient.Client) (set.Set[tailcfg.StableNod
|
||||
|
||||
// suggestExitNode returns a suggestion for reasonably good exit node based on
|
||||
// the current netmap and the previous suggestion.
|
||||
func suggestExitNode(report *netcheck.Report, nb *nodeBackend, prevSuggestion tailcfg.StableNodeID, selectRegion selectRegionFunc, selectNode selectNodeFunc, allowList set.Set[tailcfg.StableNodeID]) (res apitype.ExitNodeSuggestionResponse, err error) {
|
||||
func suggestExitNode(report *netcheck.Report, rp RouteCheckReport, nb *nodeBackend, prevSuggestion tailcfg.StableNodeID, selectRegion selectRegionFunc, selectNode selectNodeFunc, allowList set.Set[tailcfg.StableNodeID]) (res apitype.ExitNodeSuggestionResponse, err error) {
|
||||
switch {
|
||||
case nb.SelfHasCap(tailcfg.NodeAttrTrafficSteering):
|
||||
// The traffic-steering feature flag is enabled on this tailnet.
|
||||
res, err = suggestExitNodeUsingTrafficSteering(nb, allowList)
|
||||
res, err = suggestExitNodeUsingTrafficSteering(rp, nb, allowList)
|
||||
default:
|
||||
// The control plane will always strip the `traffic-steering`
|
||||
// node attribute if it isn’t enabled for this tailnet, even if
|
||||
@ -7694,10 +7702,6 @@ func suggestExitNode(report *netcheck.Report, nb *nodeBackend, prevSuggestion ta
|
||||
// the lowest latency to this device. For peers without a DERP home, we look for
|
||||
// geographic proximity to this device's DERP home.
|
||||
func suggestExitNodeUsingDERP(report *netcheck.Report, nb *nodeBackend, prevSuggestion tailcfg.StableNodeID, selectRegion selectRegionFunc, selectNode selectNodeFunc, allowList set.Set[tailcfg.StableNodeID]) (res apitype.ExitNodeSuggestionResponse, err error) {
|
||||
// TODO(sfllaw): Context needs to be plumbed down here to support
|
||||
// reachability testing.
|
||||
ctx := context.TODO()
|
||||
|
||||
netMap := nb.NetMap()
|
||||
if report == nil || report.PreferredDERP == 0 || netMap == nil || netMap.DERPMap == nil {
|
||||
return res, ErrNoPreferredDERP
|
||||
@ -7706,7 +7710,7 @@ func suggestExitNodeUsingDERP(report *netcheck.Report, nb *nodeBackend, prevSugg
|
||||
// since the netmap doesn't include delta updates (e.g., home DERP or Online
|
||||
// status changes) from the control plane since the last full update.
|
||||
candidates := nb.AppendMatchingPeers(nil, func(peer tailcfg.NodeView) bool {
|
||||
if !peer.Valid() || !nb.PeerIsReachable(ctx, peer) {
|
||||
if !peer.Valid() || !nb.PeerIsReachable(nil, peer) {
|
||||
return false
|
||||
}
|
||||
if allowList != nil && !allowList.Contains(peer.StableID()) {
|
||||
@ -7834,11 +7838,7 @@ var ErrNoNetMap = errors.New("no network map, try again later")
|
||||
// pick one of the best exit nodes. These priorities are provided by Control in
|
||||
// the node’s [tailcfg.Location]. To be eligible for consideration, the node
|
||||
// must have NodeAttrSuggestExitNode in its CapMap.
|
||||
func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, allowed set.Set[tailcfg.StableNodeID]) (apitype.ExitNodeSuggestionResponse, error) {
|
||||
// TODO(sfllaw): Context needs to be plumbed down here to support
|
||||
// reachability testing.
|
||||
ctx := context.TODO()
|
||||
|
||||
func suggestExitNodeUsingTrafficSteering(rp RouteCheckReport, nb *nodeBackend, allowed set.Set[tailcfg.StableNodeID]) (apitype.ExitNodeSuggestionResponse, error) {
|
||||
nm := nb.NetMap()
|
||||
if nm == nil {
|
||||
return apitype.ExitNodeSuggestionResponse{}, ErrNoNetMap
|
||||
@ -7857,7 +7857,7 @@ func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, allowed set.Set[tailcf
|
||||
if !p.Valid() {
|
||||
return false
|
||||
}
|
||||
if !nb.PeerIsReachable(ctx, p) {
|
||||
if !nb.PeerIsReachable(rp, p) {
|
||||
return false
|
||||
}
|
||||
if allowed != nil && !allowed.Contains(p.StableID()) {
|
||||
|
||||
@ -5391,7 +5391,7 @@ func TestSuggestExitNode(t *testing.T) {
|
||||
defer nb.shutdown(errShutdown)
|
||||
nb.SetNetMap(tt.netMap)
|
||||
|
||||
got, err := suggestExitNode(tt.lastReport, nb, tt.lastSuggestion, selectRegion, selectNode, allowList)
|
||||
got, err := suggestExitNode(tt.lastReport, nil, nb, tt.lastSuggestion, selectRegion, selectNode, allowList)
|
||||
if got.Name != tt.wantName {
|
||||
t.Errorf("name=%v, want %v", got.Name, tt.wantName)
|
||||
}
|
||||
@ -5865,7 +5865,7 @@ func TestSuggestExitNodeTrafficSteering(t *testing.T) {
|
||||
defer nb.shutdown(errShutdown)
|
||||
nb.SetNetMap(tt.netMap)
|
||||
|
||||
got, err := suggestExitNodeUsingTrafficSteering(nb, allowList)
|
||||
got, err := suggestExitNodeUsingTrafficSteering(nil, nb, allowList)
|
||||
if tt.wantErr == nil && err != nil {
|
||||
t.Fatalf("err=%v, want nil", err)
|
||||
}
|
||||
|
||||
@ -352,9 +352,16 @@ func (nb *nodeBackend) PeerAPIBase(p tailcfg.NodeView) string {
|
||||
return peerAPIBase(nm, p)
|
||||
}
|
||||
|
||||
// RouteCheckReport is an interface that reports whether a peer is reachable by the current node.
|
||||
type RouteCheckReport interface {
|
||||
// IsReachable reports whether a peer is reachable by the current node
|
||||
// or if it is unknown because it has yet to be probed.
|
||||
IsReachable(tailcfg.NodeID) (ok, known bool)
|
||||
}
|
||||
|
||||
// PeerIsReachable reports whether the current node can reach p. If the ctx is
|
||||
// done, this function may return a result based on stale reachability data.
|
||||
func (nb *nodeBackend) PeerIsReachable(ctx context.Context, p tailcfg.NodeView) bool {
|
||||
func (nb *nodeBackend) PeerIsReachable(rp RouteCheckReport, p tailcfg.NodeView) bool {
|
||||
if !nb.SelfHasCap(tailcfg.NodeAttrClientSideReachability) {
|
||||
// Legacy behavior is to always trust the control plane, which
|
||||
// isn’t always correct because the peer could be slow to check
|
||||
@ -371,19 +378,28 @@ func (nb *nodeBackend) PeerIsReachable(ctx context.Context, p tailcfg.NodeView)
|
||||
// This node can always reach itself.
|
||||
return true
|
||||
}
|
||||
return nb.peerIsReachable(ctx, p)
|
||||
}
|
||||
|
||||
func (nb *nodeBackend) peerIsReachable(ctx context.Context, p tailcfg.NodeView) bool {
|
||||
// TODO(sfllaw): The following does not actually test for client-side
|
||||
// reachability. This would require a mechanism that tracks whether the
|
||||
// current node can actually reach this peer, either because they are
|
||||
// already communicating or because they can ping each other.
|
||||
//
|
||||
// Instead, it makes the client ignore p.Online completely.
|
||||
//
|
||||
// See tailscale/corp#32686.
|
||||
return true
|
||||
if !nb.SelfHasCap(tailcfg.NodeAttrClientSideReachabilityRouteCheck) {
|
||||
// TODO(sfllaw): The following does not actually test for client-side
|
||||
// reachability. This would require a mechanism that tracks whether the
|
||||
// current node can actually reach this peer, either because they are
|
||||
// already communicating or because they can ping each other.
|
||||
//
|
||||
// Instead, it makes the client ignore p.Online completely.
|
||||
//
|
||||
// See tailscale/corp#32686.
|
||||
return true
|
||||
}
|
||||
|
||||
if rp == nil {
|
||||
return p.Online().Get() // Fallback
|
||||
}
|
||||
ok, known := rp.IsReachable(p.ID())
|
||||
if !known {
|
||||
// Reachability is unknown, because the node is a new router, so fall back.
|
||||
return p.Online().Get()
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func nodeIP(n tailcfg.NodeView, pred func(netip.Addr) bool) netip.Addr {
|
||||
|
||||
@ -182,7 +182,7 @@ func TestNodeBackendReachability(t *testing.T) {
|
||||
nb.netMap.AllCaps.Add(tailcfg.NodeAttrClientSideReachability)
|
||||
}
|
||||
|
||||
got := nb.PeerIsReachable(t.Context(), tc.peer.View())
|
||||
got := nb.PeerIsReachable(nil, tc.peer.View())
|
||||
if got != tc.want {
|
||||
t.Errorf("got %v, want %v", got, tc.want)
|
||||
}
|
||||
|
||||
@ -30,6 +30,17 @@ type Report struct {
|
||||
Reachable nodeset `json:"reachable"`
|
||||
}
|
||||
|
||||
// IsReachable reports whether a peer is reachable by the current node
|
||||
// or if it is unknown because it has yet to be probed.
|
||||
func (rp Report) IsReachable(id tailcfg.NodeID) (ok, known bool) {
|
||||
// TODO(sfllaw): We should actually track all routers and consider the
|
||||
// absence of a router in the report as it being recently added for
|
||||
// consideration, so it is unknown. Then we should positively track
|
||||
// whether a node was reachable or not.
|
||||
_, k := rp.Reachable[id]
|
||||
return k, k
|
||||
}
|
||||
|
||||
// RoutablePrefixes returns a [RoutingTable] mapping routable network prefixes
|
||||
// with the associated routers that were reachable by the current host,
|
||||
// at the time the report was finished.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user