Add a test for a rare shutdown hang

When doing a config reload that need to stop some providers while also sending SIGTERM to Prometheus at the same time can sometimes hang

1: sync.WaitGroup.Wait [83 minutes] [Created by run.(*Group).Run in goroutine 1 @ group.go:37]
    sync         sema.go:110              runtime_SemacquireWaitGroup(*uint32(#166))
    sync         waitgroup.go:118         (*WaitGroup).Wait(*WaitGroup(#23))
    discovery    manager.go:276           (*Manager).ApplyConfig(#23, #167)
    main         main.go:964              main.func5(#120)
    main         main.go:1505             reloadConfig({#183, 0x1b}, 1, #40, #43, #50, {#31, 0xa, 0})
    main         main.go:1182             main.func22()
    run          group.go:38              (*Group).Run.func1(*Group(#26), #51)

Add a test for it.

Signed-off-by: Lukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Lukasz Mierzwa 2025-07-02 16:01:42 +01:00
parent d902abc50d
commit 357e652044

View File

@ -1562,3 +1562,53 @@ func TestUnregisterMetrics(t *testing.T) {
cancel()
}
}
// Calling ApplyConfig() that removes providers at the same time as shutting down
// the manager should not hang.
func TestConfigReloadAndShutdownRace(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
mgrCtx, mgrCancel := context.WithCancel(context.Background())
discoveryManager := NewManager(mgrCtx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
var wgDiscovery sync.WaitGroup
wgDiscovery.Add(1)
go func() {
discoveryManager.Run()
wgDiscovery.Done()
}()
time.Sleep(time.Millisecond * 200)
var wgBg sync.WaitGroup
updateChan := discoveryManager.SyncCh()
wgBg.Add(1)
ctx, cancel := context.WithCancel(context.Background())
go func() {
defer wgBg.Done()
select {
case <-ctx.Done():
return
case <-updateChan:
}
}()
c := map[string]Configs{
"prometheus": {staticConfig("bar:9090")},
}
discoveryManager.ApplyConfig(c)
delete(c, "prometheus")
wgBg.Add(1)
go func() {
discoveryManager.ApplyConfig(c)
wgBg.Done()
}()
mgrCancel()
wgDiscovery.Wait()
cancel()
wgBg.Wait()
}