Limit concurrency of scrape pool reloads (#16783)

To avoid possible overload.

As per https://github.com/prometheus/prometheus/pull/16595#issuecomment-3005027067 this changes scrape pool manager to limit the number of scrape pools that can reload at the same time.

Signed-off-by: Lukasz Mierzwa <l.mierzwa@gmail.com>
This commit is contained in:
Łukasz Mierzwa 2025-06-27 12:34:07 +01:00 committed by GitHub
parent df4f1df43f
commit 748fe6d825
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -19,6 +19,7 @@ import (
"hash/fnv" "hash/fnv"
"log/slog" "log/slog"
"reflect" "reflect"
"runtime"
"sync" "sync"
"time" "time"
@ -293,13 +294,22 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error {
wg sync.WaitGroup wg sync.WaitGroup
toDelete sync.Map // Stores the list of names of pools to delete. toDelete sync.Map // Stores the list of names of pools to delete.
) )
// Use a buffered channel to limit reload concurrency.
// Each scrape pool writes the channel before we start to reload it and read from it at the end.
// This means only N pools can be reloaded at the same time.
canReload := make(chan int, runtime.GOMAXPROCS(0))
for poolName, pool := range m.scrapePools { for poolName, pool := range m.scrapePools {
canReload <- 1
wg.Add(1) wg.Add(1)
cfg, ok := m.scrapeConfigs[poolName] cfg, ok := m.scrapeConfigs[poolName]
// Reload each scrape pool in a dedicated goroutine so we don't have to wait a long time // Reload each scrape pool in a dedicated goroutine so we don't have to wait a long time
// if we have a lot of scrape pools to update. // if we have a lot of scrape pools to update.
go func(name string, sp *scrapePool, cfg *config.ScrapeConfig, ok bool) { go func(name string, sp *scrapePool, cfg *config.ScrapeConfig, ok bool) {
defer wg.Done() defer func() {
wg.Done()
<-canReload
}()
switch { switch {
case !ok: case !ok:
sp.stop() sp.stop()