From 357e6520449004565aeb4a7ed6c05f74c5cd1d87 Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Wed, 2 Jul 2025 16:01:42 +0100 Subject: [PATCH] Add a test for a rare shutdown hang When doing a config reload that need to stop some providers while also sending SIGTERM to Prometheus at the same time can sometimes hang 1: sync.WaitGroup.Wait [83 minutes] [Created by run.(*Group).Run in goroutine 1 @ group.go:37] sync sema.go:110 runtime_SemacquireWaitGroup(*uint32(#166)) sync waitgroup.go:118 (*WaitGroup).Wait(*WaitGroup(#23)) discovery manager.go:276 (*Manager).ApplyConfig(#23, #167) main main.go:964 main.func5(#120) main main.go:1505 reloadConfig({#183, 0x1b}, 1, #40, #43, #50, {#31, 0xa, 0}) main main.go:1182 main.func22() run group.go:38 (*Group).Run.func1(*Group(#26), #51) Add a test for it. Signed-off-by: Lukasz Mierzwa --- discovery/manager_test.go | 50 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 1dd10baf47..38a93be9f4 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -1562,3 +1562,53 @@ func TestUnregisterMetrics(t *testing.T) { cancel() } } + +// Calling ApplyConfig() that removes providers at the same time as shutting down +// the manager should not hang. +func TestConfigReloadAndShutdownRace(t *testing.T) { + reg := prometheus.NewRegistry() + _, sdMetrics := NewTestMetrics(t, reg) + + mgrCtx, mgrCancel := context.WithCancel(context.Background()) + discoveryManager := NewManager(mgrCtx, promslog.NewNopLogger(), reg, sdMetrics) + require.NotNil(t, discoveryManager) + discoveryManager.updatert = 100 * time.Millisecond + + var wgDiscovery sync.WaitGroup + wgDiscovery.Add(1) + go func() { + discoveryManager.Run() + wgDiscovery.Done() + }() + time.Sleep(time.Millisecond * 200) + + var wgBg sync.WaitGroup + updateChan := discoveryManager.SyncCh() + wgBg.Add(1) + ctx, cancel := context.WithCancel(context.Background()) + go func() { + defer wgBg.Done() + select { + case <-ctx.Done(): + return + case <-updateChan: + } + }() + + c := map[string]Configs{ + "prometheus": {staticConfig("bar:9090")}, + } + discoveryManager.ApplyConfig(c) + + delete(c, "prometheus") + wgBg.Add(1) + go func() { + discoveryManager.ApplyConfig(c) + wgBg.Done() + }() + mgrCancel() + wgDiscovery.Wait() + + cancel() + wgBg.Wait() +}