cmd/tailscale/cli: only print authURLs and device approval URLs once

This patch fixes several issues related to printing login and device
approval URLs, especially when `tailscale up` is interrupted:

1.  Only print a login URL that will cause `tailscale up` to complete.
    Don't print expired URLs or URLs from previous login attempts.

2.  Print the device approval URL if you run `tailscale up` after
    previously completing a login, but before approving the device.

3.  Use the correct control URL for device approval if you run a bare
    `tailscale up` after previously completing a login, but before
    approving the device.

4.  Don't print the device approval URL more than once (or at least,
    not consecutively).

Updates tailscale/corp#31476
Updates #17361

## How these fixes work

This patch went through a lot of trial and error, and there may still
be bugs! These notes capture the different scenarios and considerations
as we wrote it, which are also captured by integration tests.

1.  We were getting stale login URLs from the initial IPN state
    notification.

    When the IPN watcher was moved to before Start() in c011369, we
    mistakenly continued to request the initial state. This is only
    necessary if you start watching after you call Start(), because
    you may have missed some notifications.

    By getting the initial state before calling Start(), we'd get
    a stale login URL. If you clicked that URL, you could complete
    the login in the control server (if it wasn't expired), but your
    instance of `tailscale up` would hang, because it's listening for
    login updates from a different login URL.

    In this patch, we no longer request the initial state, and so we
    don't print a stale URL.

2.  Once you skip the initial state from IPN, the following sequence:

    *   Run `tailscale up`
    *   Log into a tailnet with device approval
    *   ^C after the device approval URL is printed, but without approving
    *   Run `tailscale up` again

    means that nothing would ever be printed.

    `tailscale up` would send tailscaled the pref `WantRunning: true`,
    but that was already the case so nothing changes. You never get any
    IPN notifications, and in particular you never get a state change to
    `NeedsMachineAuth`. This means we'd never print the device approval URL.

    In this patch, we add a hard-coded rule that if you're doing a simple up
    (which won't trigger any other IPN notifications) and you start in the
    `NeedsMachineAuth` state, we print the device approval message without
    waiting for an IPN notification.

3.  Consider the following sequence:

    *   Run `tailscale up --login-server=<custom server>`
    *   Log into a tailnet with device approval
    *   ^C after the device approval URL is printed, but without approving
    *   Run `tailscale up` again

    We'd print the device approval URL for the default control server,
    rather than the real control server, because we were using the `prefs`
    from the CLI arguments (which are all the defaults) rather than the
    `curPrefs` (which contain the custom login server).

    In this patch, we use the `prefs` if the user has specified any settings
    (and other code will ensure this is a complete set of settings) or
    `curPrefs` if it's a simple `tailscale up`.

4.  Consider the following sequence: you've logged in, but not completed
    device approval, and you run `down` and `up` in quick succession.

    *   `up`: sees state=NeedsMachineAuth
    *   `up`: sends `{wantRunning: true}`, prints out the device approval URL
    *   `down`: changes state to Stopped
    *   `up`: changes state to Starting
    *   tailscaled: changes state to NeedsMachineAuth
    *   `up`: gets an IPN notification with the state change, and prints
        a second device approval URL

    Either URL works, but this is annoying for the user.

    In this patch, we track whether the last printed URL was the device
    approval URL, and if so, we skip printing it a second time.

Signed-off-by: Alex Chan <alexc@tailscale.com>
This commit is contained in:
Alex Chan 2025-10-06 17:17:52 +01:00 committed by Alex Chan
parent bb6bd46570
commit b7fe1cea9f
3 changed files with 226 additions and 13 deletions

View File

@ -357,6 +357,13 @@ func netfilterModeFromFlag(v string) (_ preftype.NetfilterMode, warning string,
// It returns simpleUp if we're running a simple "tailscale up" to
// transition to running from a previously-logged-in but down state,
// without changing any settings.
//
// Note this can also mutate prefs to add implicit preferences for the
// user operator.
//
// TODO(alexc): the name of this function is confusing, and perhaps a
// sign that it's doing too much. Consider refactoring this so it's just
// telling the caller what to do next, but not changing anything itself.
func updatePrefs(prefs, curPrefs *ipn.Prefs, env upCheckEnv) (simpleUp bool, justEditMP *ipn.MaskedPrefs, err error) {
if !env.upArgs.reset {
applyImplicitPrefs(prefs, curPrefs, env)
@ -497,6 +504,8 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
if err != nil {
return err
}
effectivePrefs := curPrefs
if cmd == "up" {
// "tailscale up" should not be able to change the
// profile name.
@ -546,10 +555,8 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
// or we could miss IPN notifications.
//
// In particular, if we're doing a force-reauth, we could miss the
// notification with the auth URL we should print for the user. The
// initial state could contain the auth URL, but only if IPN is in the
// NeedsLogin state -- sometimes it's in Starting, and we don't get the URL.
watcher, err := localClient.WatchIPNBus(watchCtx, ipn.NotifyInitialState)
// notification with the auth URL we should print for the user.
watcher, err := localClient.WatchIPNBus(watchCtx, 0)
if err != nil {
return err
}
@ -591,6 +598,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
if err != nil {
return err
}
effectivePrefs = prefs
if upArgs.forceReauth || !st.HaveNodeKey {
err := localClient.StartLoginInteractive(ctx)
if err != nil {
@ -604,7 +612,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
go func() {
var printed bool // whether we've yet printed anything to stdout or stderr
var lastURLPrinted string
lastURLPrinted := ""
// If we're doing a force-reauth, we need to get two notifications:
//
@ -617,6 +625,15 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
ipnIsRunning := false
waitingForKeyChange := upArgs.forceReauth
// If we're doing a simple up (i.e. `tailscale up`, no flags) and
// the initial state is NeedsMachineAuth, then we never receive a
// state notification from ipn, so we print the device approval URL
// immediately.
if simpleUp && st.BackendState == ipn.NeedsMachineAuth.String() {
printed = true
printDeviceApprovalInfo(env.upArgs.json, effectivePrefs, &lastURLPrinted)
}
for {
n, err := watcher.Next()
if err != nil {
@ -629,11 +646,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
}
if s := n.State; s != nil && *s == ipn.NeedsMachineAuth {
printed = true
if env.upArgs.json {
printUpDoneJSON(ipn.NeedsMachineAuth, "")
} else {
fmt.Fprintf(Stderr, "\nTo approve your machine, visit (as admin):\n\n\t%s\n\n", prefs.AdminPageURL(policyclient.Get()))
}
printDeviceApprovalInfo(env.upArgs.json, effectivePrefs, &lastURLPrinted)
}
if s := n.State; s != nil {
ipnIsRunning = *s == ipn.Running
@ -737,6 +750,21 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
}
}
func printDeviceApprovalInfo(printJson bool, prefs *ipn.Prefs, lastURLPrinted *string) {
if printJson {
printUpDoneJSON(ipn.NeedsMachineAuth, "")
} else {
deviceApprovalURL := prefs.AdminPageURL(policyclient.Get())
if lastURLPrinted != nil && deviceApprovalURL == *lastURLPrinted {
return
}
*lastURLPrinted = deviceApprovalURL
errf("\nTo approve your machine, visit (as admin):\n\n\t%s\n\n", deviceApprovalURL)
}
}
// upWorthWarning reports whether the health check message s is worth warning
// about during "tailscale up". Many of the health checks are noisy or confusing
// or very ephemeral and happen especially briefly at startup.

View File

@ -294,13 +294,14 @@ func completeLogin(t *testing.T, control *testcontrol.Server, counter *atomic.In
// This handler receives device approval URLs, and approves the device.
//
// It counts how many URLs it sees, and will fail the test if it
// sees multiple device approval URLs.
// sees multiple device approval URLs, or if you try to approve a device
// with the wrong control server.
func completeDeviceApproval(t *testing.T, node *TestNode, counter *atomic.Int32) func(string) error {
return func(urlStr string) error {
control := node.env.Control
nodeKey := node.MustStatus().Self.PublicKey
t.Logf("saw device approval URL %q", urlStr)
if control.CompleteDeviceApproval(&nodeKey) {
if control.CompleteDeviceApproval(node.env.ControlURL(), urlStr, &nodeKey) {
if counter.Add(1) > 1 {
err := errors.New("completed multiple device approval URLs")
t.Error(err)
@ -513,6 +514,182 @@ func TestOneNodeUpAuth(t *testing.T) {
}
}
// Returns true if the error returned by [exec.Run] fails with a non-zero
// exit code, false otherwise.
func isNonZeroExitCode(err error) bool {
if err == nil {
return false
}
exitError, ok := err.(*exec.ExitError)
if !ok {
return false
}
return exitError.ExitCode() != 0
}
// If we interrupt `tailscale up` and then run it again, we should only
// print a single auth URL.
func TestOneNodeUpInterruptedAuth(t *testing.T) {
tstest.Shard(t)
tstest.Parallel(t)
env := NewTestEnv(t, ConfigureControl(
func(control *testcontrol.Server) {
control.RequireAuth = true
control.AllNodesSameUser = true
},
))
n := NewTestNode(t, env)
d := n.StartDaemon()
defer d.MustCleanShutdown(t)
cmdArgs := []string{"up", "--login-server=" + env.ControlURL()}
// The first time we run the command, we wait for an auth URL to be
// printed, and then we cancel the command -- equivalent to ^C.
//
// At this point, we've connected to control to get an auth URL,
// and printed it in the CLI, but not clicked it.
t.Logf("Running command for the first time: %s", strings.Join(cmdArgs, " "))
cmd1 := n.Tailscale(cmdArgs...)
// This handler watches for auth URLs in stdout, then cancels the
// running `tailscale up` CLI command.
cmd1.Stdout = &authURLParserWriter{t: t, authURLFn: func(urlStr string) error {
t.Logf("saw auth URL %q", urlStr)
cmd1.Process.Kill()
return nil
}}
cmd1.Stderr = cmd1.Stdout
if err := cmd1.Run(); !isNonZeroExitCode(err) {
t.Fatalf("Command did not fail with non-zero exit code: %q", err)
}
// Because we didn't click the auth URL, we should still be in NeedsLogin.
n.AwaitBackendState("NeedsLogin")
// The second time we run the command, we click the first auth URL we see
// and check that we log in correctly.
//
// In #17361, there was a bug where we'd print two auth URLs, and you could
// click either auth URL and log in to control, but logging in through the
// first URL would leave `tailscale up` hanging.
//
// Using `authURLHandler` ensures we only print the new, correct auth URL.
//
// If we print both URLs, it will throw an error because it only expects
// to log in with one auth URL.
//
// If we only print the stale auth URL, the test will timeout because
// `tailscale up` will never return.
t.Logf("Running command for the second time: %s", strings.Join(cmdArgs, " "))
var authURLCount atomic.Int32
cmd2 := n.Tailscale(cmdArgs...)
cmd2.Stdout = &authURLParserWriter{
t: t, authURLFn: completeLogin(t, env.Control, &authURLCount),
}
cmd2.Stderr = cmd2.Stdout
if err := cmd2.Run(); err != nil {
t.Fatalf("up: %v", err)
}
if urls := authURLCount.Load(); urls != 1 {
t.Errorf("Auth URLs completed = %d; want %d", urls, 1)
}
n.AwaitRunning()
}
// If we interrupt `tailscale up` and login successfully, but don't
// complete the device approval, we should see the device approval URL
// when we run `tailscale up` a second time.
func TestOneNodeUpInterruptedDeviceApproval(t *testing.T) {
tstest.Shard(t)
tstest.Parallel(t)
env := NewTestEnv(t, ConfigureControl(
func(control *testcontrol.Server) {
control.RequireAuth = true
control.RequireMachineAuth = true
control.AllNodesSameUser = true
},
))
n := NewTestNode(t, env)
d := n.StartDaemon()
defer d.MustCleanShutdown(t)
// The first time we run the command, we:
//
// * set a custom login URL
// * wait for an auth URL to be printed
// * click it to complete the login process
// * wait for a device approval URL to be printed
// * cancel the command, equivalent to ^C
//
// At this point, we've logged in to control, but our node isn't
// approved to connect to the tailnet.
cmd1Args := []string{"up", "--login-server=" + env.ControlURL()}
t.Logf("Running command: %s", strings.Join(cmd1Args, " "))
cmd1 := n.Tailscale(cmd1Args...)
handler1 := &authURLParserWriter{t: t,
authURLFn: completeLogin(t, env.Control, &atomic.Int32{}),
deviceApprovalURLFn: func(urlStr string) error {
t.Logf("saw device approval URL %q", urlStr)
cmd1.Process.Kill()
return nil
},
}
cmd1.Stdout = handler1
cmd1.Stderr = cmd1.Stdout
if err := cmd1.Run(); !isNonZeroExitCode(err) {
t.Fatalf("Command did not fail with non-zero exit code: %q", err)
}
// Because we logged in but we didn't complete the device approval, we
// should be in state NeedsMachineAuth.
n.AwaitBackendState("NeedsMachineAuth")
// The second time we run the command, we expect not to get an auth URL
// and go straight to the device approval URL. We don't need to pass the
// login server, because `tailscale up` should remember our control URL.
cmd2Args := []string{"up"}
t.Logf("Running command: %s", strings.Join(cmd2Args, " "))
var deviceApprovalURLCount atomic.Int32
cmd2 := n.Tailscale(cmd2Args...)
cmd2.Stdout = &authURLParserWriter{t: t,
authURLFn: func(urlStr string) error {
t.Fatalf("got unexpected auth URL: %q", urlStr)
cmd2.Process.Kill()
return nil
},
deviceApprovalURLFn: completeDeviceApproval(t, n, &deviceApprovalURLCount),
}
cmd2.Stderr = cmd2.Stdout
if err := cmd2.Run(); err != nil {
t.Fatalf("up: %v", err)
}
wantDeviceApprovalURLCount := int32(1)
if n := deviceApprovalURLCount.Load(); n != wantDeviceApprovalURLCount {
t.Errorf("Device approval URLs completed = %d; want %d", n, wantDeviceApprovalURLCount)
}
n.AwaitRunning()
}
func TestConfigFileAuthKey(t *testing.T) {
tstest.SkipOnUnshardedCI(t)
tstest.Shard(t)

View File

@ -687,7 +687,11 @@ func (s *Server) CompleteAuth(authPathOrURL string) bool {
return true
}
func (s *Server) CompleteDeviceApproval(nodeKey *key.NodePublic) bool {
// Complete the device approval for this node.
//
// This function returns false if the node does not exist, or you try to
// approve a device against a different control server.
func (s *Server) CompleteDeviceApproval(controlUrl string, urlStr string, nodeKey *key.NodePublic) bool {
s.mu.Lock()
defer s.mu.Unlock()
@ -696,6 +700,10 @@ func (s *Server) CompleteDeviceApproval(nodeKey *key.NodePublic) bool {
return false
}
if urlStr != controlUrl+"/admin" {
return false
}
sendUpdate(s.updates[node.ID], updateSelfChanged)
node.MachineAuthorized = true