mirror of
https://github.com/tailscale/tailscale.git
synced 2025-12-07 18:31:49 +01:00
ipn/ipnlocal: remove all the weird locking (LockedOnEntry, UnlockEarly, etc)
Fixes #11649 Updates #16369 Co-authored-by: James Sanderson <jsanderson@tailscale.com> Change-Id: I63eaa18fe870ddf81d84b949efac4d1b44c3db86 Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
parent
08e74effc0
commit
146ea42822
@ -615,6 +615,13 @@ func (c *Auto) sendStatus(who string, err error, url string, nm *netmap.NetworkM
|
||||
// does its thing, which may result in a call back into the client.
|
||||
metricQueued.Add(1)
|
||||
c.observerQueue.Add(func() {
|
||||
c.mu.Lock()
|
||||
closed := c.closed
|
||||
c.mu.Unlock()
|
||||
if closed {
|
||||
return
|
||||
}
|
||||
|
||||
if canSkipStatus(newSt, c.lastStatus.Load()) {
|
||||
metricSkippable.Add(1)
|
||||
if !c.direct.controlKnobs.DisableSkipStatusQueue.Load() {
|
||||
|
||||
@ -323,7 +323,8 @@ type ProfileStateChangeCallback func(_ ipn.LoginProfileView, _ ipn.PrefsView, sa
|
||||
// [ProfileStateChangeCallback]s are called first.
|
||||
//
|
||||
// It returns a function to be called when the cc is being shut down,
|
||||
// or nil if no cleanup is needed.
|
||||
// or nil if no cleanup is needed. That cleanup function should not call
|
||||
// back into LocalBackend, which may be locked during shutdown.
|
||||
type NewControlClientCallback func(controlclient.Client, ipn.LoginProfileView) (cleanup func())
|
||||
|
||||
// Hooks is a collection of hooks that extensions can add to (non-concurrently)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1503,15 +1503,6 @@ func wantExitNodeIDNotify(want tailcfg.StableNodeID) wantedNotification {
|
||||
}
|
||||
}
|
||||
|
||||
func wantStateNotify(want ipn.State) wantedNotification {
|
||||
return wantedNotification{
|
||||
name: "State=" + want.String(),
|
||||
cond: func(_ testing.TB, _ ipnauth.Actor, n *ipn.Notify) bool {
|
||||
return n.State != nil && *n.State == want
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestInternalAndExternalInterfaces(t *testing.T) {
|
||||
type interfacePrefix struct {
|
||||
i netmon.Interface
|
||||
@ -4318,9 +4309,9 @@ func (b *LocalBackend) SetPrefsForTest(newp *ipn.Prefs) {
|
||||
if newp == nil {
|
||||
panic("SetPrefsForTest got nil prefs")
|
||||
}
|
||||
unlock := b.lockAndGetUnlock()
|
||||
defer unlock()
|
||||
b.setPrefsLockedOnEntry(newp, unlock)
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
b.setPrefsLocked(newp)
|
||||
}
|
||||
|
||||
type peerOptFunc func(*tailcfg.Node)
|
||||
@ -5808,12 +5799,12 @@ func TestNotificationTargetMatch(t *testing.T) {
|
||||
|
||||
type newTestControlFn func(tb testing.TB, opts controlclient.Options) controlclient.Client
|
||||
|
||||
func newLocalBackendWithTestControl(t *testing.T, enableLogging bool, newControl newTestControlFn) *LocalBackend {
|
||||
func newLocalBackendWithTestControl(t testing.TB, enableLogging bool, newControl newTestControlFn) *LocalBackend {
|
||||
bus := eventbustest.NewBus(t)
|
||||
return newLocalBackendWithSysAndTestControl(t, enableLogging, tsd.NewSystemWithBus(bus), newControl)
|
||||
}
|
||||
|
||||
func newLocalBackendWithSysAndTestControl(t *testing.T, enableLogging bool, sys *tsd.System, newControl newTestControlFn) *LocalBackend {
|
||||
func newLocalBackendWithSysAndTestControl(t testing.TB, enableLogging bool, sys *tsd.System, newControl newTestControlFn) *LocalBackend {
|
||||
logf := logger.Discard
|
||||
if enableLogging {
|
||||
logf = tstest.WhileTestRunningLogger(t)
|
||||
|
||||
@ -1542,6 +1542,11 @@ func TestEngineReconfigOnStateChange(t *testing.T) {
|
||||
tt.steps(t, lb, cc)
|
||||
}
|
||||
|
||||
// TODO(bradfitz): this whole event bus settling thing
|
||||
// should be unnecessary once the bogus uses of eventbus
|
||||
// are removed. (https://github.com/tailscale/tailscale/issues/16369)
|
||||
lb.settleEventBus()
|
||||
|
||||
if gotState := lb.State(); gotState != tt.wantState {
|
||||
t.Errorf("State: got %v; want %v", gotState, tt.wantState)
|
||||
}
|
||||
@ -1572,35 +1577,30 @@ func TestEngineReconfigOnStateChange(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestStateMachineURLRace tests that wgengine updates arriving in the middle of
|
||||
// TestSendPreservesAuthURL tests that wgengine updates arriving in the middle of
|
||||
// processing an auth URL doesn't result in the auth URL being cleared.
|
||||
func TestStateMachineURLRace(t *testing.T) {
|
||||
runTestStateMachineURLRace(t, false)
|
||||
func TestSendPreservesAuthURL(t *testing.T) {
|
||||
runTestSendPreservesAuthURL(t, false)
|
||||
}
|
||||
|
||||
func TestStateMachineURLRaceSeamless(t *testing.T) {
|
||||
runTestStateMachineURLRace(t, true)
|
||||
func TestSendPreservesAuthURLSeamless(t *testing.T) {
|
||||
runTestSendPreservesAuthURL(t, true)
|
||||
}
|
||||
|
||||
func runTestStateMachineURLRace(t *testing.T, seamless bool) {
|
||||
func runTestSendPreservesAuthURL(t *testing.T, seamless bool) {
|
||||
var cc *mockControl
|
||||
b := newLocalBackendWithTestControl(t, true, func(tb testing.TB, opts controlclient.Options) controlclient.Client {
|
||||
cc = newClient(t, opts)
|
||||
return cc
|
||||
})
|
||||
|
||||
nw := newNotificationWatcher(t, b, &ipnauth.TestActor{})
|
||||
|
||||
t.Logf("Start")
|
||||
nw.watch(0, []wantedNotification{
|
||||
wantStateNotify(ipn.NeedsLogin)})
|
||||
b.Start(ipn.Options{
|
||||
UpdatePrefs: &ipn.Prefs{
|
||||
WantRunning: true,
|
||||
ControlURL: "https://localhost:1/",
|
||||
},
|
||||
})
|
||||
nw.check()
|
||||
|
||||
t.Logf("LoginFinished")
|
||||
cc.persist.UserProfile.LoginName = "user1"
|
||||
@ -1610,72 +1610,16 @@ func runTestStateMachineURLRace(t *testing.T, seamless bool) {
|
||||
b.sys.ControlKnobs().SeamlessKeyRenewal.Store(true)
|
||||
}
|
||||
|
||||
nw.watch(0, []wantedNotification{
|
||||
wantStateNotify(ipn.Starting)})
|
||||
cc.send(sendOpt{loginFinished: true, nm: &netmap.NetworkMap{
|
||||
SelfNode: (&tailcfg.Node{MachineAuthorized: true}).View(),
|
||||
}})
|
||||
nw.check()
|
||||
|
||||
t.Logf("Running")
|
||||
nw.watch(0, []wantedNotification{
|
||||
wantStateNotify(ipn.Running)})
|
||||
b.setWgengineStatus(&wgengine.Status{AsOf: time.Now(), DERPs: 1}, nil)
|
||||
nw.check()
|
||||
|
||||
t.Logf("Re-auth (StartLoginInteractive)")
|
||||
b.StartLoginInteractive(t.Context())
|
||||
|
||||
stop := make(chan struct{})
|
||||
stopSpamming := sync.OnceFunc(func() {
|
||||
stop <- struct{}{}
|
||||
})
|
||||
// if seamless renewal is enabled, the engine won't be disabled, and we won't
|
||||
// ever call stopSpamming, so make sure it does get called
|
||||
defer stopSpamming()
|
||||
|
||||
// Intercept updates between the engine and localBackend, so that we can see
|
||||
// when the "stopped" update comes in and ensure we stop sending our "we're
|
||||
// up" updates after that point.
|
||||
b.e.SetStatusCallback(func(s *wgengine.Status, err error) {
|
||||
// This is not one of our fake status updates, this is generated from the
|
||||
// engine in response to LocalBackend calling RequestStatus. Stop spamming
|
||||
// our fake statuses.
|
||||
//
|
||||
// TODO(zofrex): This is fragile, it works right now but would break if the
|
||||
// calling pattern of RequestStatus changes. We should ensure that we keep
|
||||
// sending "we're up" statuses right until Reconfig is called with
|
||||
// zero-valued configs, and after that point only send "stopped" statuses.
|
||||
stopSpamming()
|
||||
|
||||
// Once stopSpamming returns we are guaranteed to not send any more updates,
|
||||
// so we can now send the real update (indicating shutdown) and be certain
|
||||
// it will be received after any fake updates we sent. This is possibly a
|
||||
// stronger guarantee than we get from the real engine?
|
||||
b.setWgengineStatus(s, err)
|
||||
})
|
||||
|
||||
// time needs to be >= last time for the status to be accepted, send all our
|
||||
// spam with the same stale time so that when a real update comes in it will
|
||||
// definitely be accepted.
|
||||
time := b.lastStatusTime
|
||||
|
||||
// Flood localBackend with a lot of wgengine status updates, so if there are
|
||||
// any race conditions in the multiple locks/unlocks that happen as we process
|
||||
// the received auth URL, we will hit them.
|
||||
go func() {
|
||||
t.Logf("sending lots of fake wgengine status updates")
|
||||
for {
|
||||
select {
|
||||
case <-stop:
|
||||
t.Logf("stopping fake wgengine status updates")
|
||||
return
|
||||
default:
|
||||
b.setWgengineStatus(&wgengine.Status{AsOf: time, DERPs: 1}, nil)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
t.Logf("Re-auth (receive URL)")
|
||||
url1 := "https://localhost:1/1"
|
||||
cc.send(sendOpt{url: url1})
|
||||
@ -1685,122 +1629,11 @@ func runTestStateMachineURLRace(t *testing.T, seamless bool) {
|
||||
// status update to trample it have ended as well.
|
||||
if b.authURL == "" {
|
||||
t.Fatalf("expected authURL to be set")
|
||||
} else {
|
||||
t.Log("authURL was set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWGEngineDownThenUpRace(t *testing.T) {
|
||||
var cc *mockControl
|
||||
b := newLocalBackendWithTestControl(t, true, func(tb testing.TB, opts controlclient.Options) controlclient.Client {
|
||||
cc = newClient(t, opts)
|
||||
return cc
|
||||
})
|
||||
|
||||
nw := newNotificationWatcher(t, b, &ipnauth.TestActor{})
|
||||
|
||||
t.Logf("Start")
|
||||
nw.watch(0, []wantedNotification{
|
||||
wantStateNotify(ipn.NeedsLogin)})
|
||||
b.Start(ipn.Options{
|
||||
UpdatePrefs: &ipn.Prefs{
|
||||
WantRunning: true,
|
||||
ControlURL: "https://localhost:1/",
|
||||
},
|
||||
})
|
||||
nw.check()
|
||||
|
||||
t.Logf("LoginFinished")
|
||||
cc.persist.UserProfile.LoginName = "user1"
|
||||
cc.persist.NodeID = "node1"
|
||||
|
||||
nw.watch(0, []wantedNotification{
|
||||
wantStateNotify(ipn.Starting)})
|
||||
cc.send(sendOpt{loginFinished: true, nm: &netmap.NetworkMap{
|
||||
SelfNode: (&tailcfg.Node{MachineAuthorized: true}).View(),
|
||||
}})
|
||||
nw.check()
|
||||
|
||||
nw.watch(0, []wantedNotification{
|
||||
wantStateNotify(ipn.Running)})
|
||||
b.setWgengineStatus(&wgengine.Status{AsOf: time.Now(), DERPs: 1}, nil)
|
||||
nw.check()
|
||||
|
||||
t.Logf("Re-auth (StartLoginInteractive)")
|
||||
b.StartLoginInteractive(t.Context())
|
||||
|
||||
var timeLock sync.RWMutex
|
||||
timestamp := b.lastStatusTime
|
||||
|
||||
engineShutdown := make(chan struct{})
|
||||
gotShutdown := sync.OnceFunc(func() {
|
||||
t.Logf("engineShutdown")
|
||||
engineShutdown <- struct{}{}
|
||||
})
|
||||
|
||||
b.e.SetStatusCallback(func(s *wgengine.Status, err error) {
|
||||
timeLock.Lock()
|
||||
if s.AsOf.After(timestamp) {
|
||||
timestamp = s.AsOf
|
||||
}
|
||||
timeLock.Unlock()
|
||||
|
||||
if err != nil || (s.DERPs == 0 && len(s.Peers) == 0) {
|
||||
gotShutdown()
|
||||
} else {
|
||||
b.setWgengineStatus(s, err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Logf("Re-auth (receive URL)")
|
||||
url1 := "https://localhost:1/1"
|
||||
|
||||
done := make(chan struct{})
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Go(func() {
|
||||
t.Log("cc.send starting")
|
||||
cc.send(sendOpt{url: url1}) // will block until engine stops
|
||||
t.Log("cc.send returned")
|
||||
})
|
||||
|
||||
<-engineShutdown // will get called once cc.send is blocked
|
||||
gotShutdown = sync.OnceFunc(func() {
|
||||
t.Logf("engineShutdown")
|
||||
engineShutdown <- struct{}{}
|
||||
})
|
||||
|
||||
wg.Go(func() {
|
||||
t.Log("StartLoginInteractive starting")
|
||||
b.StartLoginInteractive(t.Context()) // will also block until engine stops
|
||||
t.Log("StartLoginInteractive returned")
|
||||
})
|
||||
|
||||
<-engineShutdown // will get called once StartLoginInteractive is blocked
|
||||
|
||||
st := controlclient.Status{}
|
||||
st.SetStateForTest(controlclient.StateAuthenticated)
|
||||
b.SetControlClientStatus(cc, st)
|
||||
|
||||
timeLock.RLock()
|
||||
b.setWgengineStatus(&wgengine.Status{AsOf: timestamp}, nil) // engine is down event finally arrives
|
||||
b.setWgengineStatus(&wgengine.Status{AsOf: timestamp, DERPs: 1}, nil) // engine is back up
|
||||
timeLock.RUnlock()
|
||||
|
||||
go func() {
|
||||
wg.Wait()
|
||||
done <- struct{}{}
|
||||
}()
|
||||
|
||||
t.Log("waiting for .send and .StartLoginInteractive to return")
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(10 * time.Second):
|
||||
t.Fatalf("timed out waiting")
|
||||
}
|
||||
|
||||
t.Log("both returned")
|
||||
}
|
||||
|
||||
func buildNetmapWithPeers(self tailcfg.NodeView, peers ...tailcfg.NodeView) *netmap.NetworkMap {
|
||||
const (
|
||||
firstAutoUserID = tailcfg.UserID(10000)
|
||||
@ -2033,6 +1866,14 @@ func (e *mockEngine) RequestStatus() {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *mockEngine) ResetAndStop() (*wgengine.Status, error) {
|
||||
err := e.Reconfig(&wgcfg.Config{}, &router.Config{}, &dns.Config{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &wgengine.Status{AsOf: time.Now()}, nil
|
||||
}
|
||||
|
||||
func (e *mockEngine) PeerByKey(key.NodePublic) (_ wgint.Peer, ok bool) {
|
||||
return wgint.Peer{}, false
|
||||
}
|
||||
|
||||
@ -12,6 +12,8 @@ import (
|
||||
|
||||
type ExecQueue struct {
|
||||
mu sync.Mutex
|
||||
ctx context.Context // context.Background + closed on Shutdown
|
||||
cancel context.CancelFunc // closes ctx
|
||||
closed bool
|
||||
inFlight bool // whether a goroutine is running q.run
|
||||
doneWaiter chan struct{} // non-nil if waiter is waiting, then closed
|
||||
@ -24,6 +26,7 @@ func (q *ExecQueue) Add(f func()) {
|
||||
if q.closed {
|
||||
return
|
||||
}
|
||||
q.initCtxLocked()
|
||||
if q.inFlight {
|
||||
q.queue = append(q.queue, f)
|
||||
} else {
|
||||
@ -79,18 +82,32 @@ func (q *ExecQueue) Shutdown() {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
q.closed = true
|
||||
if q.cancel != nil {
|
||||
q.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
// Wait waits for the queue to be empty.
|
||||
func (q *ExecQueue) initCtxLocked() {
|
||||
if q.ctx == nil {
|
||||
q.ctx, q.cancel = context.WithCancel(context.Background())
|
||||
}
|
||||
}
|
||||
|
||||
// Wait waits for the queue to be empty or shut down.
|
||||
func (q *ExecQueue) Wait(ctx context.Context) error {
|
||||
q.mu.Lock()
|
||||
q.initCtxLocked()
|
||||
waitCh := q.doneWaiter
|
||||
if q.inFlight && waitCh == nil {
|
||||
waitCh = make(chan struct{})
|
||||
q.doneWaiter = waitCh
|
||||
}
|
||||
closed := q.closed
|
||||
q.mu.Unlock()
|
||||
|
||||
if closed {
|
||||
return errors.New("execqueue shut down")
|
||||
}
|
||||
if waitCh == nil {
|
||||
return nil
|
||||
}
|
||||
@ -98,6 +115,8 @@ func (q *ExecQueue) Wait(ctx context.Context) error {
|
||||
select {
|
||||
case <-waitCh:
|
||||
return nil
|
||||
case <-q.ctx.Done():
|
||||
return errors.New("execqueue shut down")
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
@ -47,6 +47,7 @@ import (
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/types/netmap"
|
||||
"tailscale.com/types/views"
|
||||
"tailscale.com/util/backoff"
|
||||
"tailscale.com/util/checkchange"
|
||||
"tailscale.com/util/clientmetric"
|
||||
"tailscale.com/util/eventbus"
|
||||
@ -924,6 +925,32 @@ func hasOverlap(aips, rips views.Slice[netip.Prefix]) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// ResetAndStop resets the engine to a clean state (like calling Reconfig
|
||||
// with all pointers to zero values) and waits for it to be fully stopped,
|
||||
// with no live peers or DERPs.
|
||||
//
|
||||
// Unlike Reconfig, it does not return ErrNoChanges.
|
||||
//
|
||||
// If the engine stops, returns the status. NB that this status will not be sent
|
||||
// to the registered status callback, it is on the caller to ensure this status
|
||||
// is handled appropriately.
|
||||
func (e *userspaceEngine) ResetAndStop() (*Status, error) {
|
||||
if err := e.Reconfig(&wgcfg.Config{}, &router.Config{}, &dns.Config{}); err != nil && !errors.Is(err, ErrNoChanges) {
|
||||
return nil, err
|
||||
}
|
||||
bo := backoff.NewBackoff("UserspaceEngineResetAndStop", e.logf, 1*time.Second)
|
||||
for {
|
||||
st, err := e.getStatus()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(st.Peers) == 0 && st.DERPs == 0 {
|
||||
return st, nil
|
||||
}
|
||||
bo.BackOff(context.Background(), fmt.Errorf("waiting for engine to stop: peers=%d derps=%d", len(st.Peers), st.DERPs))
|
||||
}
|
||||
}
|
||||
|
||||
func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, dnsCfg *dns.Config) error {
|
||||
if routerCfg == nil {
|
||||
panic("routerCfg must not be nil")
|
||||
|
||||
@ -124,6 +124,12 @@ func (e *watchdogEngine) watchdog(name string, fn func()) {
|
||||
func (e *watchdogEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, dnsCfg *dns.Config) error {
|
||||
return e.watchdogErr("Reconfig", func() error { return e.wrap.Reconfig(cfg, routerCfg, dnsCfg) })
|
||||
}
|
||||
func (e *watchdogEngine) ResetAndStop() (st *Status, err error) {
|
||||
e.watchdog("ResetAndStop", func() {
|
||||
st, err = e.wrap.ResetAndStop()
|
||||
})
|
||||
return st, err
|
||||
}
|
||||
func (e *watchdogEngine) GetFilter() *filter.Filter {
|
||||
return e.wrap.GetFilter()
|
||||
}
|
||||
|
||||
@ -69,6 +69,13 @@ type Engine interface {
|
||||
// The returned error is ErrNoChanges if no changes were made.
|
||||
Reconfig(*wgcfg.Config, *router.Config, *dns.Config) error
|
||||
|
||||
// ResetAndStop resets the engine to a clean state (like calling Reconfig
|
||||
// with all pointers to zero values) and waits for it to be fully stopped,
|
||||
// with no live peers or DERPs.
|
||||
//
|
||||
// Unlike Reconfig, it does not return ErrNoChanges.
|
||||
ResetAndStop() (*Status, error)
|
||||
|
||||
// PeerForIP returns the node to which the provided IP routes,
|
||||
// if any. If none is found, (nil, false) is returned.
|
||||
PeerForIP(netip.Addr) (_ PeerForIP, ok bool)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user