diff --git a/notifier/alertmanagerset.go b/notifier/alertmanagerset.go index c47c9ea23a..b6d1b8c4aa 100644 --- a/notifier/alertmanagerset.go +++ b/notifier/alertmanagerset.go @@ -111,7 +111,8 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { if _, ok := seen[us]; ok { continue } - s.metrics.latency.DeleteLabelValues(us) + s.metrics.latencySummary.DeleteLabelValues(us) + s.metrics.latencyHistogram.DeleteLabelValues(us) s.metrics.sent.DeleteLabelValues(us) s.metrics.errors.DeleteLabelValues(us) seen[us] = struct{}{} diff --git a/notifier/manager.go b/notifier/manager.go index 65adfd5c3e..e37f59a250 100644 --- a/notifier/manager.go +++ b/notifier/manager.go @@ -498,7 +498,9 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { amSetCovered.CompareAndSwap(k, false, true) } - n.metrics.latency.WithLabelValues(url).Observe(time.Since(begin).Seconds()) + durationSeconds := time.Since(begin).Seconds() + n.metrics.latencySummary.WithLabelValues(url).Observe(durationSeconds) + n.metrics.latencyHistogram.WithLabelValues(url).Observe(durationSeconds) n.metrics.sent.WithLabelValues(url).Add(float64(count)) wg.Done() diff --git a/notifier/metric.go b/notifier/metric.go index b9a55b3ec7..3f4abdda93 100644 --- a/notifier/metric.go +++ b/notifier/metric.go @@ -13,10 +13,15 @@ package notifier -import "github.com/prometheus/client_golang/prometheus" +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" +) type alertMetrics struct { - latency *prometheus.SummaryVec + latencySummary *prometheus.SummaryVec + latencyHistogram *prometheus.HistogramVec errors *prometheus.CounterVec sent *prometheus.CounterVec dropped prometheus.Counter @@ -25,9 +30,13 @@ type alertMetrics struct { alertmanagersDiscovered prometheus.GaugeFunc } -func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanagersDiscovered func() float64) *alertMetrics { +func newAlertMetrics( + r prometheus.Registerer, + queueCap int, + queueLen, alertmanagersDiscovered func() float64, +) *alertMetrics { m := &alertMetrics{ - latency: prometheus.NewSummaryVec(prometheus.SummaryOpts{ + latencySummary: prometheus.NewSummaryVec(prometheus.SummaryOpts{ Namespace: namespace, Subsystem: subsystem, Name: "latency_seconds", @@ -36,6 +45,19 @@ func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanag }, []string{alertmanagerLabel}, ), + latencyHistogram: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "latency_histogram_seconds", + Help: "Latency histogram for sending alert notifications.", + + Buckets: []float64{.01, .1, 1, 10}, + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1 * time.Hour, + }, + []string{alertmanagerLabel}, + ), errors: prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, @@ -80,7 +102,8 @@ func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanag if r != nil { r.MustRegister( - m.latency, + m.latencySummary, + m.latencyHistogram, m.errors, m.sent, m.dropped,