diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 0e9d7c1d78..e513926da6 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -518,6 +518,9 @@ func main() { serverOnlyFlag(a, "alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications."). Default("10000").IntVar(&cfg.notifier.QueueCapacity) + serverOnlyFlag(a, "alertmanager.notification-batch-size", "The maximum number of notifications per batch to send to the Alertmanager."). + Default(strconv.Itoa(notifier.DefaultMaxBatchSize)).IntVar(&cfg.notifier.MaxBatchSize) + serverOnlyFlag(a, "alertmanager.drain-notification-queue-on-shutdown", "Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down."). Default("true").BoolVar(&cfg.notifier.DrainOnShutdown) diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 0f58ff4b18..5124255316 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -55,6 +55,7 @@ The Prometheus monitoring server | --rules.alert.resend-delay | Minimum amount of time to wait before resending an alert to Alertmanager. Use with server mode only. | `1m` | | --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. When set, "query.max-concurrency" may need to be adjusted accordingly. Use with server mode only. | `4` | | --alertmanager.notification-queue-capacity | The capacity of the queue for pending Alertmanager notifications. Use with server mode only. | `10000` | +| --alertmanager.notification-batch-size | The maximum number of notifications per batch to send to the Alertmanager. Use with server mode only. | `256` | | --alertmanager.drain-notification-queue-on-shutdown | Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down. Use with server mode only. | `true` | | --query.lookback-delta | The maximum lookback duration for retrieving metrics during expression evaluations and federation. Use with server mode only. | `5m` | | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | diff --git a/notifier/notifier.go b/notifier/notifier.go index 153c1039f8..e237a5e1c8 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -45,6 +45,9 @@ import ( ) const ( + // DefaultMaxBatchSize is the default maximum number of alerts to send in a single request to the alertmanager. + DefaultMaxBatchSize = 256 + contentTypeJSON = "application/json" ) @@ -132,6 +135,9 @@ type Options struct { Do func(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) Registerer prometheus.Registerer + + // MaxBatchSize determines the maximum number of alerts to send in a single request to the alertmanager. + MaxBatchSize int } type alertMetrics struct { @@ -224,6 +230,10 @@ func NewManager(o *Options, logger *slog.Logger) *Manager { if o.Do == nil { o.Do = do } + // Set default MaxBatchSize if not provided. + if o.MaxBatchSize <= 0 { + o.MaxBatchSize = DefaultMaxBatchSize + } if logger == nil { logger = promslog.NewNopLogger() } @@ -294,8 +304,6 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { return nil } -const maxBatchSize = 64 - func (n *Manager) queueLen() int { n.mtx.RLock() defer n.mtx.RUnlock() @@ -309,7 +317,7 @@ func (n *Manager) nextBatch() []*Alert { var alerts []*Alert - if len(n.queue) > maxBatchSize { + if maxBatchSize := n.opts.MaxBatchSize; len(n.queue) > maxBatchSize { alerts = append(make([]*Alert, 0, maxBatchSize), n.queue[:maxBatchSize]...) n.queue = n.queue[maxBatchSize:] } else { diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index d9ba1ca9ab..0ecd64ae1d 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -43,6 +43,8 @@ import ( "github.com/prometheus/prometheus/model/relabel" ) +const maxBatchSize = 256 + func TestPostPath(t *testing.T) { cases := []struct { in, out string @@ -413,6 +415,7 @@ func TestCustomDo(t *testing.T) { func TestExternalLabels(t *testing.T) { h := NewManager(&Options{ QueueCapacity: 3 * maxBatchSize, + MaxBatchSize: maxBatchSize, ExternalLabels: labels.FromStrings("a", "b"), RelabelConfigs: []*relabel.Config{ { @@ -447,6 +450,7 @@ func TestExternalLabels(t *testing.T) { func TestHandlerRelabel(t *testing.T) { h := NewManager(&Options{ QueueCapacity: 3 * maxBatchSize, + MaxBatchSize: maxBatchSize, RelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"alertname"}, @@ -525,6 +529,7 @@ func TestHandlerQueuing(t *testing.T) { h := NewManager( &Options{ QueueCapacity: 3 * maxBatchSize, + MaxBatchSize: maxBatchSize, }, nil, )