diff --git a/scrape/manager.go b/scrape/manager.go index 6cd4477494..e46c7fdf86 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -26,6 +26,7 @@ import ( config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/promslog" + "go.uber.org/atomic" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -287,29 +288,46 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { } // Cleanup and reload pool if the configuration has changed. - var failed bool - for name, sp := range m.scrapePools { - switch cfg, ok := m.scrapeConfigs[name]; { - case !ok: - sp.stop() - delete(m.scrapePools, name) - case !reflect.DeepEqual(sp.config, cfg): - err := sp.reload(cfg) - if err != nil { - m.logger.Error("error reloading scrape pool", "err", err, "scrape_pool", name) - failed = true + var ( + failed atomic.Bool + wg sync.WaitGroup + toDelete sync.Map // Stores the list of names of pools to delete. + ) + for poolName, pool := range m.scrapePools { + wg.Add(1) + cfg, ok := m.scrapeConfigs[poolName] + // Reload each scrape pool in a dedicated goroutine so we don't have to wait a long time + // if we have a lot of scrape pools to update. + go func(name string, sp *scrapePool, cfg *config.ScrapeConfig, ok bool) { + defer wg.Done() + switch { + case !ok: + sp.stop() + toDelete.Store(name, struct{}{}) + case !reflect.DeepEqual(sp.config, cfg): + err := sp.reload(cfg) + if err != nil { + m.logger.Error("error reloading scrape pool", "err", err, "scrape_pool", name) + failed.Store(true) + } + fallthrough + case ok: + if l, ok := m.scrapeFailureLoggers[cfg.ScrapeFailureLogFile]; ok { + sp.SetScrapeFailureLogger(l) + } else { + sp.logger.Error("No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", name) + } } - fallthrough - case ok: - if l, ok := m.scrapeFailureLoggers[cfg.ScrapeFailureLogFile]; ok { - sp.SetScrapeFailureLogger(l) - } else { - sp.logger.Error("No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", name) - } - } + }(poolName, pool, cfg, ok) } + wg.Wait() - if failed { + toDelete.Range(func(name, _ any) bool { + delete(m.scrapePools, name.(string)) + return true + }) + + if failed.Load() { return errors.New("failed to apply the new configuration") } return nil