From 17ca2ece2ba80f7019e4df0a0be5abfd9724e129 Mon Sep 17 00:00:00 2001 From: Andrea Gottardo Date: Mon, 17 Jun 2024 14:58:42 -0700 Subject: [PATCH] health: introduce captive-portal-detected Warnable Updates tailscale/tailscale#1634 This PR introduces a new `captive-portal-detected` Warnable which is set to an unhealthy state whenever a captive portal is detected on the local network, preventing Tailscale from connecting. Signed-off-by: Andrea Gottardo --- cmd/k8s-operator/depaware.txt | 1 + cmd/tailscale/depaware.txt | 3 +- cmd/tailscaled/depaware.txt | 1 + control/controlknobs/controlknobs.go | 7 + ipn/ipnlocal/local.go | 84 +++++++ net/captivedetection/captivedetection.go | 218 ++++++++++++++++++ net/captivedetection/captivedetection_test.go | 101 ++++++++ net/captivedetection/endpoints.go | 213 +++++++++++++++++ net/captivedetection/rawconn.go | 19 ++ net/captivedetection/rawconn_apple.go | 24 ++ net/dnsfallback/dnsfallback.go | 11 +- net/dnsfallback/dnsfallback_test.go | 4 +- net/netcheck/netcheck.go | 79 +------ net/netcheck/netcheck_test.go | 50 ---- tailcfg/tailcfg.go | 7 +- 15 files changed, 688 insertions(+), 134 deletions(-) create mode 100644 net/captivedetection/captivedetection.go create mode 100644 net/captivedetection/captivedetection_test.go create mode 100644 net/captivedetection/endpoints.go create mode 100644 net/captivedetection/rawconn.go create mode 100644 net/captivedetection/rawconn_apple.go diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index b5c0ed517..547f2ec57 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -701,6 +701,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ tailscale.com/logtail/backoff from tailscale.com/control/controlclient+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/metrics from tailscale.com/derp+ + tailscale.com/net/captivedetection from tailscale.com/ipn/ipnlocal+ tailscale.com/net/connstats from tailscale.com/net/tstun+ tailscale.com/net/dns from tailscale.com/ipn/ipnlocal+ tailscale.com/net/dns/publicdns from tailscale.com/net/dns+ diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index 80b011d04..c03be655d 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -100,9 +100,10 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/ipn/ipnstate from tailscale.com/client/tailscale+ tailscale.com/licenses from tailscale.com/client/web+ tailscale.com/metrics from tailscale.com/derp + tailscale.com/net/captivedetection from tailscale.com/net/netcheck tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback tailscale.com/net/dnscache from tailscale.com/control/controlhttp+ - tailscale.com/net/dnsfallback from tailscale.com/control/controlhttp + tailscale.com/net/dnsfallback from tailscale.com/control/controlhttp+ tailscale.com/net/flowtrack from tailscale.com/net/packet tailscale.com/net/netaddr from tailscale.com/ipn+ tailscale.com/net/netcheck from tailscale.com/cmd/tailscale/cli diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 5b37778f8..5512e9eff 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -288,6 +288,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/logtail/backoff from tailscale.com/cmd/tailscaled+ tailscale.com/logtail/filch from tailscale.com/log/sockstatlog+ tailscale.com/metrics from tailscale.com/derp+ + tailscale.com/net/captivedetection from tailscale.com/ipn/ipnlocal+ tailscale.com/net/connstats from tailscale.com/net/tstun+ tailscale.com/net/dns from tailscale.com/cmd/tailscaled+ tailscale.com/net/dns/publicdns from tailscale.com/net/dns+ diff --git a/control/controlknobs/controlknobs.go b/control/controlknobs/controlknobs.go index 7315a10f7..dd76a3abd 100644 --- a/control/controlknobs/controlknobs.go +++ b/control/controlknobs/controlknobs.go @@ -99,6 +99,10 @@ type Knobs struct { // DisableCryptorouting indicates that the node should not use the // magicsock crypto routing feature. DisableCryptorouting atomic.Bool + + // DisableCaptivePortalDetection is whether the node should not perform captive portal detection + // automatically when the network state changes. + DisableCaptivePortalDetection atomic.Bool } // UpdateFromNodeAttributes updates k (if non-nil) based on the provided self @@ -127,6 +131,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) { disableSplitDNSWhenNoCustomResolvers = has(tailcfg.NodeAttrDisableSplitDNSWhenNoCustomResolvers) disableLocalDNSOverrideViaNRPT = has(tailcfg.NodeAttrDisableLocalDNSOverrideViaNRPT) disableCryptorouting = has(tailcfg.NodeAttrDisableMagicSockCryptoRouting) + disableCaptivePortalDetection = has(tailcfg.NodeAttrDisableCaptivePortalDetection) ) if has(tailcfg.NodeAttrOneCGNATEnable) { @@ -153,6 +158,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) { k.DisableSplitDNSWhenNoCustomResolvers.Store(disableSplitDNSWhenNoCustomResolvers) k.DisableLocalDNSOverrideViaNRPT.Store(disableLocalDNSOverrideViaNRPT) k.DisableCryptorouting.Store(disableCryptorouting) + k.DisableCaptivePortalDetection.Store(disableCaptivePortalDetection) } // AsDebugJSON returns k as something that can be marshalled with json.Marshal @@ -180,5 +186,6 @@ func (k *Knobs) AsDebugJSON() map[string]any { "DisableSplitDNSWhenNoCustomResolvers": k.DisableSplitDNSWhenNoCustomResolvers.Load(), "DisableLocalDNSOverrideViaNRPT": k.DisableLocalDNSOverrideViaNRPT.Load(), "DisableCryptorouting": k.DisableCryptorouting.Load(), + "DisableCaptivePortalDetection": k.DisableCaptivePortalDetection.Load(), } } diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index ede29156b..446b4a976 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -60,6 +60,7 @@ import ( "tailscale.com/ipn/policy" "tailscale.com/log/sockstatlog" "tailscale.com/logpolicy" + "tailscale.com/net/captivedetection" "tailscale.com/net/dns" "tailscale.com/net/dnscache" "tailscale.com/net/dnsfallback" @@ -344,6 +345,11 @@ type LocalBackend struct { // refreshAutoExitNode indicates if the exit node should be recomputed when the next netcheck report is available. refreshAutoExitNode bool + + // captiveDetectionTimer is a timer acting as debouncer to trigger captive portal detection + // upon a lack of Internet connectivity, avoiding spurious detection attempts. + // It is always nil unless a captive portal detection attempt is pending. + captiveDetectionTimer *time.Timer } // HealthTracker returns the health tracker for the backend. @@ -669,6 +675,10 @@ func (b *LocalBackend) pauseOrResumeControlClientLocked() { b.cc.SetPaused((b.state == ipn.Stopped && b.netMap != nil) || (!networkUp && !testenv.InTest() && !assumeNetworkUpdateForTest())) } +// captivePortalDetectionInterval is the duration to wait in an unhealthy state with connectivity broken +// before running captive portal detection. +const captivePortalDetectionInterval = 2 * time.Second + // linkChange is our network monitor callback, called whenever the network changes. func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) { b.mu.Lock() @@ -719,6 +729,30 @@ func (b *LocalBackend) onHealthChange(w *health.Warnable, us *health.UnhealthySt b.send(ipn.Notify{ Health: state, }) + + isConnectivityImpacted := false + for _, w := range state.Warnings { + if w.ImpactsConnectivity { + isConnectivityImpacted = true + break + } + } + + if isConnectivityImpacted { + b.logf("health: connectivity impacted; triggering captive portal detection in %v", captivePortalDetectionInterval) + b.mu.Lock() + if b.captiveDetectionTimer != nil { + b.captiveDetectionTimer.Reset(captivePortalDetectionInterval) + } else { + b.captiveDetectionTimer = time.AfterFunc(captivePortalDetectionInterval, func() { + b.mu.Lock() + b.captiveDetectionTimer = nil + b.mu.Unlock() + b.performCaptiveDetection() + }) + } + b.mu.Unlock() + } } // Shutdown halts the backend and all its sub-components. The backend @@ -2097,6 +2131,52 @@ func (b *LocalBackend) updateFilterLocked(netMap *netmap.NetworkMap, prefs ipn.P } } +// captivePortalWarnable is a Warnable which is set to an unhealthy state when a captive portal is detected. +var captivePortalWarnable = health.Register(&health.Warnable{ + Code: "captive-portal-detected", + Title: "Captive portal detected", + // High severity, because captive portals block all traffic and require user intervention. + Severity: health.SeverityHigh, + Text: health.StaticMessage("This network requires you to log in using your web browser."), + ImpactsConnectivity: true, +}) + +// performCaptiveDetection checks if captive portal detection is enabled via controlknob. If so, it runs +// the detection and updates the Warnable accordingly. +func (b *LocalBackend) performCaptiveDetection() { + captiveDetectionDisabledByControlKnob := b.ControlKnobs().DisableCaptivePortalDetection.Load() + if captiveDetectionDisabledByControlKnob { + b.logf("performCaptiveDetection: disabled by controlknob") + return + } + + // Only perform detection if ipn.State is Running. + if b.State() != ipn.Running { + b.logf("performCaptiveDetection: ignored because not running") + return + } + + d := captivedetection.NewDetector(b.logf) + var dm *tailcfg.DERPMap + b.mu.Lock() + if b.netMap != nil { + dm = b.netMap.DERPMap + } + preferredDERP := 0 + if b.hostinfo != nil { + if b.hostinfo.NetInfo != nil { + preferredDERP = b.hostinfo.NetInfo.PreferredDERP + } + } + b.mu.Unlock() + found := d.Detect(b.ctx, b.NetMon(), dm, preferredDERP) + if found { + b.health.SetUnhealthy(captivePortalWarnable, health.Args{}) + } else { + b.health.SetHealthy(captivePortalWarnable) + } +} + // packetFilterPermitsUnlockedNodes reports any peer in peers with the // UnsignedPeerAPIOnly bool set true has any of its allowed IPs in the packet // filter. @@ -4493,6 +4573,10 @@ func (b *LocalBackend) enterStateLockedOnEntry(newState ipn.State, unlock unlock } else if oldState == ipn.Running { // Transitioning away from running. b.closePeerAPIListenersLocked() + if b.captiveDetectionTimer != nil { + b.captiveDetectionTimer.Stop() + b.captiveDetectionTimer = nil + } } b.pauseOrResumeControlClientLocked() diff --git a/net/captivedetection/captivedetection.go b/net/captivedetection/captivedetection.go new file mode 100644 index 000000000..567f400f5 --- /dev/null +++ b/net/captivedetection/captivedetection.go @@ -0,0 +1,218 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +// Package captivedetection provides a way to detect if the system is connected to a network that has +// a captive portal. It does this by making HTTP requests to known captive portal detection endpoints +// and checking if the HTTP responses indicate that a captive portal might be present. +package captivedetection + +import ( + "context" + "net" + "net/http" + "runtime" + "strings" + "sync" + "syscall" + "time" + + "tailscale.com/net/netmon" + "tailscale.com/tailcfg" + "tailscale.com/types/logger" +) + +// Detector checks whether the system is behind a captive portal. +type Detector struct { + // mu is a mutex used to protect httpClient and currIfIndex from concurrent access. + // It is needed because we issue requests concurrently to different endpoints, using + // a shared httpClient. + mu sync.Mutex + // httpClient is the HTTP client that is used for captive portal detection. It is configured + // to not follow redirects, have a short timeout and no keep-alive. + httpClient *http.Client + // currIfIndex is the index of the interface that is currently being used by the httpClient. + currIfIndex int + // logf is the logger used for logging messages. If it is nil, log.Printf is used. + logf logger.Logf +} + +// NewDetector creates a new Detector instance for captive portal detection. +func NewDetector(logf logger.Logf) *Detector { + d := &Detector{logf: logf} + d.httpClient = &http.Client{ + // No redirects allowed + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + Transport: &http.Transport{ + DialContext: d.dialContext, + DisableKeepAlives: true, + }, + Timeout: Timeout, + } + return d +} + +// Timeout is the timeout for captive portal detection requests. Because the captive portal intercepting our requests +// is usually located on the LAN, this is a relatively short timeout. +const Timeout = 3 * time.Second + +// Detect is the entry point to the API. It attempts to detect if the system is behind a captive portal +// by making HTTP requests to known captive portal detection Endpoints. If any of the requests return a response code +// or body that looks like a captive portal, Detect returns true. It returns false in all other cases, including when any +// error occurs during a detection attempt. +// +// This function might take a while to return, as it will attempt to detect a captive portal on all available interfaces +// by performing multiple HTTP requests. It should be called in a separate goroutine if you want to avoid blocking. +func (d *Detector) Detect(ctx context.Context, netMon *netmon.Monitor, derpMap *tailcfg.DERPMap, preferredDERPRegionID int) (found bool) { + return d.detectCaptivePortalWithGOOS(ctx, netMon, derpMap, preferredDERPRegionID, runtime.GOOS) +} + +func (d *Detector) detectCaptivePortalWithGOOS(ctx context.Context, netMon *netmon.Monitor, derpMap *tailcfg.DERPMap, preferredDERPRegionID int, goos string) (found bool) { + ifState := netMon.InterfaceState() + if !ifState.AnyInterfaceUp() { + d.logf("[v2] DetectCaptivePortal: no interfaces up, returning false") + return false + } + + endpoints := availableEndpoints(derpMap, preferredDERPRegionID, d.logf, goos) + + // Here we try detecting a captive portal using *all* available interfaces on the system + // that have a IPv4 address. We consider to have found a captive portal when any interface + // reports one may exists. This is necessary because most systems have multiple interfaces, + // and most importantly on macOS no default route interface is set until the user has accepted + // the captive portal alert thrown by the system. If no default route interface is known, + // we need to try with anything that might remotely resemble a Wi-Fi interface. + for ifName, i := range ifState.Interface { + if !i.IsUp() || i.IsLoopback() || interfaceNameDoesNotNeedCaptiveDetection(ifName, goos) { + continue + } + addrs, err := i.Addrs() + if err != nil { + d.logf("[v1] DetectCaptivePortal: failed to get addresses for interface %s: %v", ifName, err) + continue + } + if len(addrs) == 0 { + continue + } + d.logf("[v2] attempting to do captive portal detection on interface %s", ifName) + res := d.detectOnInterface(ctx, i.Index, endpoints, netMon) + if res { + d.logf("DetectCaptivePortal(found=true,ifName=%s)", found, ifName) + return true + } + } + + d.logf("DetectCaptivePortal(found=false)") + return false +} + +func interfaceNameDoesNotNeedCaptiveDetection(ifName string, goos string) bool { + ifName = strings.ToLower(ifName) + excludedPrefixes := []string{"tailscale", "tun", "tap", "docker", "kube", "wg"} + if goos == "windows" { + excludedPrefixes = append(excludedPrefixes, "loopback", "tunnel", "ppp", "isatap", "teredo", "6to4") + } else if goos == "darwin" || goos == "ios" { + excludedPrefixes = append(excludedPrefixes, "awdl", "bridge", "ap", "utun", "tap", "llw", "anpi", "lo", "stf", "gif", "xhc") + } + for _, prefix := range excludedPrefixes { + if strings.HasPrefix(ifName, prefix) { + return true + } + } + return false +} + +// detectOnInterface reports whether or not we think the system is behind a +// captive portal, detected by making a request to a URL that we know should +// return a "204 No Content" response and checking if that's what we get. +// +// The boolean return is whether we think we have a captive portal. +func (d *Detector) detectOnInterface(ctx context.Context, ifIndex int, endpoints []Endpoint, netMon *netmon.Monitor) bool { + defer d.httpClient.CloseIdleConnections() + + d.logf("[v2] %d available captive portal detection endpoints: %v", len(endpoints), endpoints) + + // We try to detect the captive portal more quickly by making requests to multiple endpoints concurrently. + var wg sync.WaitGroup + resultCh := make(chan bool, len(endpoints)) + + for i, e := range endpoints { + if i >= 5 { + // Try a maximum of 5 endpoints, break out (returning false) if we run of attempts. + break + } + wg.Add(1) + go func(endpoint Endpoint) { + defer wg.Done() + found, err := d.verifyCaptivePortalEndpoint(ctx, endpoint, ifIndex) + if err != nil { + d.logf("[v1] checkCaptivePortalEndpoint failed with endpoint %v: %v", endpoint, err) + return + } + if found { + resultCh <- true + } + }(e) + } + + go func() { + wg.Wait() + close(resultCh) + }() + + for result := range resultCh { + if result { + // If any of the endpoints seems to be a captive portal, we consider the system to be behind one. + return true + } + } + + return false +} + +// verifyCaptivePortalEndpoint checks if the given Endpoint is a captive portal by making an HTTP request to the +// given Endpoint URL using the interface with index ifIndex, and checking if the response looks like a captive portal. +func (d *Detector) verifyCaptivePortalEndpoint(ctx context.Context, e Endpoint, ifIndex int) (found bool, err error) { + req, err := http.NewRequestWithContext(ctx, "GET", e.URL.String(), nil) + if err != nil { + return false, err + } + + // Attach the Tailscale challenge header if the endpoint supports it. Not all captive portal detection endpoints + // support this, so we only attach it if the endpoint does. + if e.SupportsTailscaleChallenge { + // Note: the set of valid characters in a challenge and the total + // length is limited; see isChallengeChar in cmd/derper for more + // details. + chal := "ts_" + e.URL.Host + req.Header.Set("X-Tailscale-Challenge", chal) + } + + d.mu.Lock() + d.currIfIndex = ifIndex + d.mu.Unlock() + + // Make the actual request, and check if the response looks like a captive portal or not. + r, err := d.httpClient.Do(req) + if err != nil { + return false, err + } + + return e.responseLooksLikeCaptive(r, d.logf), nil +} + +func (d *Detector) dialContext(ctx context.Context, network, addr string) (net.Conn, error) { + d.mu.Lock() + defer d.mu.Unlock() + + ifIndex := d.currIfIndex + + dl := net.Dialer{ + Control: func(network, address string, c syscall.RawConn) error { + return setSocketInterfaceIndex(c, ifIndex, d.logf) + }, + } + + return dl.DialContext(ctx, network, addr) +} diff --git a/net/captivedetection/captivedetection_test.go b/net/captivedetection/captivedetection_test.go new file mode 100644 index 000000000..14fd3aef8 --- /dev/null +++ b/net/captivedetection/captivedetection_test.go @@ -0,0 +1,101 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package captivedetection + +import ( + "context" + "net/http" + "net/url" + "runtime" + "sync" + "testing" + + "tailscale.com/net/netmon" +) + +func TestAvailableEndpointsAlwaysAtLeastTwo(t *testing.T) { + endpoints := availableEndpoints(nil, 0, t.Logf, runtime.GOOS) + if len(endpoints) == 0 { + t.Errorf("Expected non-empty AvailableEndpoints, got an empty slice instead") + } + if len(endpoints) == 1 { + t.Errorf("Expected at least two AvailableEndpoints for redundancy, got only one instead") + } + for _, e := range endpoints { + if e.URL.Scheme != "http" { + t.Errorf("Expected HTTP URL in Endpoint, got HTTPS") + } + } +} + +func TestAvailableEndpointsUsesAppleOnDarwin(t *testing.T) { + darwinOK := false + iosOK := false + for _, os := range []string{"darwin", "ios"} { + endpoints := availableEndpoints(nil, 0, t.Logf, os) + if len(endpoints) == 0 { + t.Errorf("Expected non-empty AvailableEndpoints, got an empty slice instead") + } + u, _ := url.Parse("http://captive.apple.com/hotspot-detect.html") + want := Endpoint{u, http.StatusOK, "Success", false, Platform} + for _, e := range endpoints { + if e.Equal(want) { + if os == "darwin" { + darwinOK = true + } else if os == "ios" { + iosOK = true + } + } + } + } + + if !darwinOK || !iosOK { + t.Errorf("Expected to find Apple captive portal detection URL on both Darwin and iOS, but didn't") + } +} + +func TestAvailableEndpointsUsesMSFTOnWindows(t *testing.T) { + endpoints := availableEndpoints(nil, 0, t.Logf, "windows") + if len(endpoints) == 0 { + t.Errorf("Expected non-empty AvailableEndpoints, got an empty slice instead") + } + u, _ := url.Parse("http://www.msftconnecttest.com/connecttest.txt") + want := Endpoint{u, http.StatusOK, "Microsoft Connect Test", false, Platform} + for _, e := range endpoints { + if e.Equal(want) { + return + } + } + t.Errorf("Expected to find Microsoft captive portal detection URL on Windows, but didn't") +} + +func TestDetectCaptivePortalReturnsFalse(t *testing.T) { + d := NewDetector(t.Logf) + found := d.Detect(context.Background(), netmon.NewStatic(), nil, 0) + if found { + t.Errorf("DetectCaptivePortal returned true, expected false.") + } +} + +func TestAllEndpointsAreUpAndReturnExpectedResponse(t *testing.T) { + d := NewDetector(t.Logf) + endpoints := availableEndpoints(nil, 0, t.Logf, runtime.GOOS) + + var wg sync.WaitGroup + for _, e := range endpoints { + wg.Add(1) + go func(endpoint Endpoint) { + defer wg.Done() + found, err := d.verifyCaptivePortalEndpoint(context.Background(), endpoint, 0) + if err != nil { + t.Errorf("verifyCaptivePortalEndpoint failed with endpoint %v: %v", endpoint, err) + } + if found { + t.Errorf("verifyCaptivePortalEndpoint with endpoint %v says we're behind a captive portal, but we aren't", endpoint) + } + }(e) + } + + wg.Wait() +} diff --git a/net/captivedetection/endpoints.go b/net/captivedetection/endpoints.go new file mode 100644 index 000000000..fe136d3c4 --- /dev/null +++ b/net/captivedetection/endpoints.go @@ -0,0 +1,213 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package captivedetection + +import ( + "cmp" + "fmt" + "io" + "net/http" + "net/url" + "slices" + "strings" + + "tailscale.com/net/dnsfallback" + "tailscale.com/tailcfg" + "tailscale.com/types/logger" +) + +// EndpointProvider is an enum that represents the source of an Endpoint. +type EndpointProvider int + +const ( + // DERPMapPreferred is used for an endpoint that is a DERP node contained in the current preferred DERP region, + // as provided by the DERPMap. + DERPMapPreferred EndpointProvider = iota + // DERPMapOther is used for an endpoint that is a DERP node, but not contained in the current preferred DERP region. + DERPMapOther + // Platform is used for an endpoint that is a well-known captive portal detection URL for the current platform + // (operated by Apple, Microsoft, etc.) + Platform + // Tailscale is used for endpoints that are the Tailscale coordination server or admin console. + Tailscale +) + +func (p EndpointProvider) String() string { + switch p { + case DERPMapPreferred: + return "DERPMapPreferred" + case Tailscale: + return "Tailscale" + case Platform: + return "Platform" + case DERPMapOther: + return "DERPMapOther" + default: + return fmt.Sprintf("EndpointProvider(%d)", p) + } +} + +// Endpoint represents a URL that can be used to detect a captive portal, along with the expected +// result of the HTTP request. +type Endpoint struct { + // URL is the URL that we make an HTTP request to as part of the captive portal detection process. + URL *url.URL + // StatusCode is the expected HTTP status code that we expect to see in the response. + StatusCode int + // ExpectedContent is a string that we expect to see contained in the response body. If this is non-empty, + // we will check that the response body contains this string. If it is empty, we will not check the response body + // and only check the status code. + ExpectedContent string + // SupportsTailscaleChallenge is true if the endpoint will return the sent value of the X-Tailscale-Challenge + // HTTP header in its HTTP response. + SupportsTailscaleChallenge bool + // Provider is the source of the endpoint. This is used to prioritize certain endpoints over others + // (for example, a DERP node in the preferred region should always be used first). + Provider EndpointProvider +} + +func (e Endpoint) String() string { + return fmt.Sprintf("Endpoint{URL=%q, StatusCode=%d, ExpectedContent=%q, SupportsTailscaleChallenge=%v, Provider=%s}", e.URL, e.StatusCode, e.ExpectedContent, e.SupportsTailscaleChallenge, e.Provider.String()) +} + +func (e Endpoint) Equal(other Endpoint) bool { + return e.URL.String() == other.URL.String() && + e.StatusCode == other.StatusCode && + e.ExpectedContent == other.ExpectedContent && + e.SupportsTailscaleChallenge == other.SupportsTailscaleChallenge && + e.Provider == other.Provider +} + +// availableEndpoints returns a set of Endpoints which can be used for captive portal detection by performing +// one or more HTTP requests and looking at the response. The returned Endpoints are ordered by preference, +// with the most preferred Endpoint being the first in the slice. +func availableEndpoints(derpMap *tailcfg.DERPMap, preferredDERPRegionID int, logf logger.Logf, goos string) []Endpoint { + endpoints := []Endpoint{} + + if derpMap == nil || len(derpMap.Regions) == 0 { + // When the client first starts, we don't have a DERPMap in LocalBackend yet. In this case, + // we use the static DERPMap from dnsfallback. + logf("captivedetection: current DERPMap is empty, using map from dnsfallback") + derpMap = dnsfallback.GetDERPMap() + } + // Use the DERP IPs as captive portal detection endpoints. Using IPs is better than hostnames + // because they do not depend on DNS resolution. + for _, region := range derpMap.Regions { + if region.Avoid { + continue + } + for _, node := range region.Nodes { + if node.IPv4 == "" { + continue + } + str := "http://" + node.IPv4 + "/generate_204" + u, err := url.Parse(str) + if err != nil { + logf("captivedetection: failed to parse DERP node URL %q: %v", str, err) + continue + } + p := DERPMapOther + if region.RegionID == preferredDERPRegionID { + p = DERPMapPreferred + } + e := Endpoint{u, http.StatusNoContent, "", true, p} + endpoints = append(endpoints, e) + } + } + + // Let's also try the default Tailscale coordination server and admin console. + // These are likely to be blocked on some networks. + appendTailscaleEndpoint := func(urlString string) { + u, err := url.Parse(urlString) + if err != nil { + logf("captivedetection: failed to parse Tailscale URL %q: %v", urlString, err) + return + } + endpoints = append(endpoints, Endpoint{u, http.StatusNoContent, "", false, Tailscale}) + } + appendTailscaleEndpoint("http://controlplane.tailscale.com/generate_204") + appendTailscaleEndpoint("http://login.tailscale.com/generate_204") + + // Lastly, to be safe, let's also include some well-known captive portal detection URLs that are not under the + // tailscale.com umbrella. These are less likely to be blocked on public networks since blocking them + // would break captive portal detection for many devices. + appendPlatformEndpoint := func(urlString string, statusCode int, expectedContent string) { + u, err := url.Parse(urlString) + if err != nil { + logf("captivedetection: failed to parse Platform URL %q: %v", urlString, err) + return + } + endpoints = append(endpoints, Endpoint{u, statusCode, expectedContent, false, Platform}) + } + + switch goos { + case "windows": + appendPlatformEndpoint("http://www.msftconnecttest.com/connecttest.txt", http.StatusOK, "Microsoft Connect Test") + appendPlatformEndpoint("http://www.msftncsi.com/ncsi.txt", http.StatusOK, "Microsoft NCSI") + case "darwin", "ios": + appendPlatformEndpoint("http://captive.apple.com/hotspot-detect.html", http.StatusOK, "Success") + appendPlatformEndpoint("http://www.thinkdifferent.us/", http.StatusOK, "Success") + appendPlatformEndpoint("http://www.airport.us/", http.StatusOK, "Success") + case "android": + appendPlatformEndpoint("http://connectivitycheck.android.com/generate_204", http.StatusNoContent, "") + appendPlatformEndpoint("http://connectivitycheck.gstatic.com/generate_204", http.StatusNoContent, "") + appendPlatformEndpoint("http://play.googleapis.com/generate_204", http.StatusNoContent, "") + appendPlatformEndpoint("http://clients3.google.com/generate_204", http.StatusNoContent, "") + default: + appendPlatformEndpoint("http://detectportal.firefox.com/success.txt", http.StatusOK, "success") + appendPlatformEndpoint("http://network-test.debian.org/nm", http.StatusOK, "NetworkManager is online") + } + + // Sort the endpoints by provider so that we can prioritize DERP nodes in the preferred region, followed by + // any other DERP server elsewhere, followed by Tailscale endpoints, and lastly any platform-specific endpoints. + slices.SortFunc(endpoints, func(x, y Endpoint) int { + return cmp.Compare(x.Provider, y.Provider) + }) + + return endpoints +} + +// responseLooksLikeCaptive checks if the given HTTP response matches the expected response for the Endpoint. +func (e Endpoint) responseLooksLikeCaptive(r *http.Response, logf logger.Logf) bool { + defer r.Body.Close() + + // Check the status code first. + if r.StatusCode != e.StatusCode { + logf("[v1] unexpected status code in captive portal response: want=%d, got=%d", e.StatusCode, r.StatusCode) + return true + } + + // If the endpoint supports the Tailscale challenge header, check that the response contains the expected header. + if e.SupportsTailscaleChallenge { + expectedResponse := "response ts_" + e.URL.Host + hasResponse := r.Header.Get("X-Tailscale-Response") == expectedResponse + if !hasResponse { + // The response did not contain the expected X-Tailscale-Response header, which means we are most likely + // behind a captive portal (somebody is tampering with the response headers). + logf("captive portal check response did not contain expected X-Tailscale-Response header: want=%q, got=%q", expectedResponse, r.Header.Get("X-Tailscale-Response")) + return true + } + } + + // If we don't have an expected content string, we don't need to check the response body. + if e.ExpectedContent == "" { + return false + } + + // Read the response body and check if it contains the expected content. + b, err := io.ReadAll(io.LimitReader(r.Body, 4096)) + if err != nil { + logf("reading captive portal check response body failed: %v", err) + return false + } + hasExpectedContent := strings.Contains(string(b), e.ExpectedContent) + if !hasExpectedContent { + // The response body did not contain the expected content, that means we are most likely behind a captive portal. + logf("[v1] captive portal check response body did not contain expected content: want=%q", e.ExpectedContent) + return true + } + + // If we got here, the response looks good. + return false +} diff --git a/net/captivedetection/rawconn.go b/net/captivedetection/rawconn.go new file mode 100644 index 000000000..a7197d9df --- /dev/null +++ b/net/captivedetection/rawconn.go @@ -0,0 +1,19 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !(ios || darwin) + +package captivedetection + +import ( + "syscall" + + "tailscale.com/types/logger" +) + +// setSocketInterfaceIndex sets the IP_BOUND_IF socket option on the given RawConn. +// This forces the socket to use the given interface. +func setSocketInterfaceIndex(c syscall.RawConn, ifIndex int, logf logger.Logf) error { + // No-op on non-Darwin platforms. + return nil +} diff --git a/net/captivedetection/rawconn_apple.go b/net/captivedetection/rawconn_apple.go new file mode 100644 index 000000000..12b4446e6 --- /dev/null +++ b/net/captivedetection/rawconn_apple.go @@ -0,0 +1,24 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build ios || darwin + +package captivedetection + +import ( + "syscall" + + "golang.org/x/sys/unix" + "tailscale.com/types/logger" +) + +// setSocketInterfaceIndex sets the IP_BOUND_IF socket option on the given RawConn. +// This forces the socket to use the given interface. +func setSocketInterfaceIndex(c syscall.RawConn, ifIndex int, logf logger.Logf) error { + return c.Control((func(fd uintptr) { + err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_BOUND_IF, ifIndex) + if err != nil { + logf("captivedetection: failed to set IP_BOUND_IF (ifIndex=%d): %v", ifIndex, err) + } + })) +} diff --git a/net/dnsfallback/dnsfallback.go b/net/dnsfallback/dnsfallback.go index 6b3ac864e..8322d3887 100644 --- a/net/dnsfallback/dnsfallback.go +++ b/net/dnsfallback/dnsfallback.go @@ -219,7 +219,7 @@ func lookup(ctx context.Context, host string, logf logger.Logf, ht *health.Track ip netip.Addr } - dm := getDERPMap() + dm := GetDERPMap() var cands4, cands6 []nameIP for _, dr := range dm.Regions { @@ -310,9 +310,12 @@ func bootstrapDNSMap(ctx context.Context, serverName string, serverIP netip.Addr // https://derp10.tailscale.com/bootstrap-dns type dnsMap map[string][]netip.Addr -// getDERPMap returns some DERP map. The DERP servers also run a fallback -// DNS server. -func getDERPMap() *tailcfg.DERPMap { +// GetDERPMap returns a fallback DERP map that is always available, useful for basic +// bootstrapping purposes. The dynamically updated DERP map in LocalBackend should +// always be preferred over this. Use this DERP map only when the control plane is +// unreachable or hasn't been reached yet. The DERP servers in the returned map also +// run a fallback DNS server. +func GetDERPMap() *tailcfg.DERPMap { dm := getStaticDERPMap() // Merge in any DERP servers from the cached map that aren't in the diff --git a/net/dnsfallback/dnsfallback_test.go b/net/dnsfallback/dnsfallback_test.go index 4298499b0..16f5027d4 100644 --- a/net/dnsfallback/dnsfallback_test.go +++ b/net/dnsfallback/dnsfallback_test.go @@ -18,7 +18,7 @@ import ( ) func TestGetDERPMap(t *testing.T) { - dm := getDERPMap() + dm := GetDERPMap() if dm == nil { t.Fatal("nil") } @@ -78,7 +78,7 @@ func TestCache(t *testing.T) { } // Verify that our DERP map is merged with the cache. - dm := getDERPMap() + dm := GetDERPMap() region, ok := dm.Regions[99] if !ok { t.Fatal("expected region 99") diff --git a/net/netcheck/netcheck.go b/net/netcheck/netcheck.go index 80957039e..8eb50a61d 100644 --- a/net/netcheck/netcheck.go +++ b/net/netcheck/netcheck.go @@ -14,13 +14,11 @@ import ( "io" "log" "maps" - "math/rand/v2" "net" "net/http" "net/netip" "runtime" "sort" - "strings" "sync" "syscall" "time" @@ -28,6 +26,7 @@ import ( "github.com/tcnksm/go-httpstat" "tailscale.com/derp/derphttp" "tailscale.com/envknob" + "tailscale.com/net/captivedetection" "tailscale.com/net/dnscache" "tailscale.com/net/neterror" "tailscale.com/net/netmon" @@ -847,11 +846,8 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap, opts *GetRe tmr := time.AfterFunc(c.captivePortalDelay(), func() { defer close(ch) - found, err := c.checkCaptivePortal(ctx, dm, preferredDERP) - if err != nil { - c.logf("[v1] checkCaptivePortal: %v", err) - return - } + d := captivedetection.NewDetector(c.logf) + found := d.Detect(ctx, c.NetMon, dm, preferredDERP) rs.report.CaptivePortal.Set(found) }) @@ -988,75 +984,6 @@ func (c *Client) finishAndStoreReport(rs *reportState, dm *tailcfg.DERPMap) *Rep return report } -var noRedirectClient = &http.Client{ - // No redirects allowed - CheckRedirect: func(req *http.Request, via []*http.Request) error { - return http.ErrUseLastResponse - }, - - // Remaining fields are the same as the default client. - Transport: http.DefaultClient.Transport, - Jar: http.DefaultClient.Jar, - Timeout: http.DefaultClient.Timeout, -} - -// checkCaptivePortal reports whether or not we think the system is behind a -// captive portal, detected by making a request to a URL that we know should -// return a "204 No Content" response and checking if that's what we get. -// -// The boolean return is whether we think we have a captive portal. -func (c *Client) checkCaptivePortal(ctx context.Context, dm *tailcfg.DERPMap, preferredDERP int) (bool, error) { - defer noRedirectClient.CloseIdleConnections() - - // If we have a preferred DERP region with more than one node, try - // that; otherwise, pick a random one not marked as "Avoid". - if preferredDERP == 0 || dm.Regions[preferredDERP] == nil || - (preferredDERP != 0 && len(dm.Regions[preferredDERP].Nodes) == 0) { - rids := make([]int, 0, len(dm.Regions)) - for id, reg := range dm.Regions { - if reg == nil || reg.Avoid || len(reg.Nodes) == 0 { - continue - } - rids = append(rids, id) - } - if len(rids) == 0 { - return false, nil - } - preferredDERP = rids[rand.IntN(len(rids))] - } - - node := dm.Regions[preferredDERP].Nodes[0] - - if strings.HasSuffix(node.HostName, tailcfg.DotInvalid) { - // Don't try to connect to invalid hostnames. This occurred in tests: - // https://github.com/tailscale/tailscale/issues/6207 - // TODO(bradfitz,andrew-d): how to actually handle this nicely? - return false, nil - } - - req, err := http.NewRequestWithContext(ctx, "GET", "http://"+node.HostName+"/generate_204", nil) - if err != nil { - return false, err - } - - // Note: the set of valid characters in a challenge and the total - // length is limited; see isChallengeChar in cmd/derper for more - // details. - chal := "ts_" + node.HostName - req.Header.Set("X-Tailscale-Challenge", chal) - r, err := noRedirectClient.Do(req) - if err != nil { - return false, err - } - defer r.Body.Close() - - expectedResponse := "response " + chal - validResponse := r.Header.Get("X-Tailscale-Response") == expectedResponse - - c.logf("[v2] checkCaptivePortal url=%q status_code=%d valid_response=%v", req.URL.String(), r.StatusCode, validResponse) - return r.StatusCode != 204 || !validResponse, nil -} - // runHTTPOnlyChecks is the netcheck done by environments that can // only do HTTP requests, such as ws/wasm. func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *reportState, dm *tailcfg.DERPMap) error { diff --git a/net/netcheck/netcheck_test.go b/net/netcheck/netcheck_test.go index 8b7124744..26e52602a 100644 --- a/net/netcheck/netcheck_test.go +++ b/net/netcheck/netcheck_test.go @@ -15,14 +15,12 @@ import ( "sort" "strconv" "strings" - "sync/atomic" "testing" "time" "tailscale.com/net/netmon" "tailscale.com/net/stun/stuntest" "tailscale.com/tailcfg" - "tailscale.com/tstest" "tailscale.com/tstest/nettest" ) @@ -778,54 +776,6 @@ func TestSortRegions(t *testing.T) { } } -func TestNoCaptivePortalWhenUDP(t *testing.T) { - nettest.SkipIfNoNetwork(t) // empirically. not sure why. - - // Override noRedirectClient to handle the /generate_204 endpoint - var generate204Called atomic.Bool - tr := RoundTripFunc(func(req *http.Request) *http.Response { - if !strings.HasSuffix(req.URL.String(), "/generate_204") { - panic("bad URL: " + req.URL.String()) - } - generate204Called.Store(true) - return &http.Response{ - StatusCode: http.StatusNoContent, - Header: make(http.Header), - } - }) - - tstest.Replace(t, &noRedirectClient.Transport, http.RoundTripper(tr)) - - stunAddr, cleanup := stuntest.Serve(t) - defer cleanup() - - c := newTestClient(t) - c.testEnoughRegions = 1 - // Set the delay long enough that we have time to cancel it - // when our STUN probe succeeds. - c.testCaptivePortalDelay = 10 * time.Second - - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) - defer cancel() - - if err := c.Standalone(ctx, "127.0.0.1:0"); err != nil { - t.Fatal(err) - } - - r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String()), nil) - if err != nil { - t.Fatal(err) - } - - // Should not have called our captive portal function. - if generate204Called.Load() { - t.Errorf("captive portal check called; expected no call") - } - if r.CaptivePortal != "" { - t.Errorf("got CaptivePortal=%q, want empty", r.CaptivePortal) - } -} - type RoundTripFunc func(req *http.Request) *http.Response func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 60a2244dd..d14c64d50 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -145,7 +145,8 @@ type CapabilityVersion int // - 100: 2024-06-18: Client supports filtertype.Match.SrcCaps (issue #12542) // - 101: 2024-07-01: Client supports SSH agent forwarding when handling connections with /bin/su // - 102: 2024-07-12: NodeAttrDisableMagicSockCryptoRouting support -const CurrentCapabilityVersion CapabilityVersion = 102 +// - 103: 2024-07-12: Client supports NodeAttrDisableCaptivePortalDetection +const CurrentCapabilityVersion CapabilityVersion = 103 type StableID string @@ -2327,6 +2328,10 @@ const ( // NodeAttrDisableMagicSockCryptoRouting disables the use of the // magicsock cryptorouting hook. See tailscale/corp#20732. NodeAttrDisableMagicSockCryptoRouting NodeCapability = "disable-magicsock-crypto-routing" + + // NodeAttrDisableCaptivePortalDetection instructs the client to not perform captive portal detection + // automatically when the network state changes. + NodeAttrDisableCaptivePortalDetection NodeCapability = "disable-captive-portal-detection" ) // SetDNSRequest is a request to add a DNS record.