diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go
index 0e9d7c1d78..e513926da6 100644
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@@ -518,6 +518,9 @@ func main() {
serverOnlyFlag(a, "alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications.").
Default("10000").IntVar(&cfg.notifier.QueueCapacity)
+ serverOnlyFlag(a, "alertmanager.notification-batch-size", "The maximum number of notifications per batch to send to the Alertmanager.").
+ Default(strconv.Itoa(notifier.DefaultMaxBatchSize)).IntVar(&cfg.notifier.MaxBatchSize)
+
serverOnlyFlag(a, "alertmanager.drain-notification-queue-on-shutdown", "Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down.").
Default("true").BoolVar(&cfg.notifier.DrainOnShutdown)
diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md
index 0f58ff4b18..5124255316 100644
--- a/docs/command-line/prometheus.md
+++ b/docs/command-line/prometheus.md
@@ -55,6 +55,7 @@ The Prometheus monitoring server
| --rules.alert.resend-delay | Minimum amount of time to wait before resending an alert to Alertmanager. Use with server mode only. | `1m` |
| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. When set, "query.max-concurrency" may need to be adjusted accordingly. Use with server mode only. | `4` |
| --alertmanager.notification-queue-capacity | The capacity of the queue for pending Alertmanager notifications. Use with server mode only. | `10000` |
+| --alertmanager.notification-batch-size | The maximum number of notifications per batch to send to the Alertmanager. Use with server mode only. | `256` |
| --alertmanager.drain-notification-queue-on-shutdown | Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down. Use with server mode only. | `true` |
| --query.lookback-delta | The maximum lookback duration for retrieving metrics during expression evaluations and federation. Use with server mode only. | `5m` |
| --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
diff --git a/notifier/notifier.go b/notifier/notifier.go
index 153c1039f8..e237a5e1c8 100644
--- a/notifier/notifier.go
+++ b/notifier/notifier.go
@@ -45,6 +45,9 @@ import (
)
const (
+ // DefaultMaxBatchSize is the default maximum number of alerts to send in a single request to the alertmanager.
+ DefaultMaxBatchSize = 256
+
contentTypeJSON = "application/json"
)
@@ -132,6 +135,9 @@ type Options struct {
Do func(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error)
Registerer prometheus.Registerer
+
+ // MaxBatchSize determines the maximum number of alerts to send in a single request to the alertmanager.
+ MaxBatchSize int
}
type alertMetrics struct {
@@ -224,6 +230,10 @@ func NewManager(o *Options, logger *slog.Logger) *Manager {
if o.Do == nil {
o.Do = do
}
+ // Set default MaxBatchSize if not provided.
+ if o.MaxBatchSize <= 0 {
+ o.MaxBatchSize = DefaultMaxBatchSize
+ }
if logger == nil {
logger = promslog.NewNopLogger()
}
@@ -294,8 +304,6 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
return nil
}
-const maxBatchSize = 64
-
func (n *Manager) queueLen() int {
n.mtx.RLock()
defer n.mtx.RUnlock()
@@ -309,7 +317,7 @@ func (n *Manager) nextBatch() []*Alert {
var alerts []*Alert
- if len(n.queue) > maxBatchSize {
+ if maxBatchSize := n.opts.MaxBatchSize; len(n.queue) > maxBatchSize {
alerts = append(make([]*Alert, 0, maxBatchSize), n.queue[:maxBatchSize]...)
n.queue = n.queue[maxBatchSize:]
} else {
diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go
index d9ba1ca9ab..0ecd64ae1d 100644
--- a/notifier/notifier_test.go
+++ b/notifier/notifier_test.go
@@ -43,6 +43,8 @@ import (
"github.com/prometheus/prometheus/model/relabel"
)
+const maxBatchSize = 256
+
func TestPostPath(t *testing.T) {
cases := []struct {
in, out string
@@ -413,6 +415,7 @@ func TestCustomDo(t *testing.T) {
func TestExternalLabels(t *testing.T) {
h := NewManager(&Options{
QueueCapacity: 3 * maxBatchSize,
+ MaxBatchSize: maxBatchSize,
ExternalLabels: labels.FromStrings("a", "b"),
RelabelConfigs: []*relabel.Config{
{
@@ -447,6 +450,7 @@ func TestExternalLabels(t *testing.T) {
func TestHandlerRelabel(t *testing.T) {
h := NewManager(&Options{
QueueCapacity: 3 * maxBatchSize,
+ MaxBatchSize: maxBatchSize,
RelabelConfigs: []*relabel.Config{
{
SourceLabels: model.LabelNames{"alertname"},
@@ -525,6 +529,7 @@ func TestHandlerQueuing(t *testing.T) {
h := NewManager(
&Options{
QueueCapacity: 3 * maxBatchSize,
+ MaxBatchSize: maxBatchSize,
},
nil,
)