mirror of
https://github.com/prometheus/prometheus.git
synced 2025-08-05 13:47:10 +02:00
chore: refactor notifier package
Split the notifier package into smaller source files. Signed-off-by: Siavash Safi <siavash@cloudflare.com>
This commit is contained in:
parent
8ad21d0659
commit
ef48e4cb9f
91
notifier/alert.go
Normal file
91
notifier/alert.go
Normal file
@ -0,0 +1,91 @@
|
||||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/model/relabel"
|
||||
)
|
||||
|
||||
// Alert is a generic representation of an alert in the Prometheus eco-system.
|
||||
type Alert struct {
|
||||
// Label value pairs for purpose of aggregation, matching, and disposition
|
||||
// dispatching. This must minimally include an "alertname" label.
|
||||
Labels labels.Labels `json:"labels"`
|
||||
|
||||
// Extra key/value information which does not define alert identity.
|
||||
Annotations labels.Labels `json:"annotations"`
|
||||
|
||||
// The known time range for this alert. Both ends are optional.
|
||||
StartsAt time.Time `json:"startsAt,omitempty"`
|
||||
EndsAt time.Time `json:"endsAt,omitempty"`
|
||||
GeneratorURL string `json:"generatorURL,omitempty"`
|
||||
}
|
||||
|
||||
// Name returns the name of the alert. It is equivalent to the "alertname" label.
|
||||
func (a *Alert) Name() string {
|
||||
return a.Labels.Get(labels.AlertName)
|
||||
}
|
||||
|
||||
// Hash returns a hash over the alert. It is equivalent to the alert labels hash.
|
||||
func (a *Alert) Hash() uint64 {
|
||||
return a.Labels.Hash()
|
||||
}
|
||||
|
||||
func (a *Alert) String() string {
|
||||
s := fmt.Sprintf("%s[%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7])
|
||||
if a.Resolved() {
|
||||
return s + "[resolved]"
|
||||
}
|
||||
return s + "[active]"
|
||||
}
|
||||
|
||||
// Resolved returns true iff the activity interval ended in the past.
|
||||
func (a *Alert) Resolved() bool {
|
||||
return a.ResolvedAt(time.Now())
|
||||
}
|
||||
|
||||
// ResolvedAt returns true iff the activity interval ended before
|
||||
// the given timestamp.
|
||||
func (a *Alert) ResolvedAt(ts time.Time) bool {
|
||||
if a.EndsAt.IsZero() {
|
||||
return false
|
||||
}
|
||||
return !a.EndsAt.After(ts)
|
||||
}
|
||||
|
||||
func relabelAlerts(relabelConfigs []*relabel.Config, externalLabels labels.Labels, alerts []*Alert) []*Alert {
|
||||
lb := labels.NewBuilder(labels.EmptyLabels())
|
||||
var relabeledAlerts []*Alert
|
||||
|
||||
for _, a := range alerts {
|
||||
lb.Reset(a.Labels)
|
||||
externalLabels.Range(func(l labels.Label) {
|
||||
if a.Labels.Get(l.Name) == "" {
|
||||
lb.Set(l.Name, l.Value)
|
||||
}
|
||||
})
|
||||
|
||||
keep := relabel.ProcessBuilder(lb, relabelConfigs...)
|
||||
if !keep {
|
||||
continue
|
||||
}
|
||||
a.Labels = lb.Labels()
|
||||
relabeledAlerts = append(relabeledAlerts, a)
|
||||
}
|
||||
return relabeledAlerts
|
||||
}
|
90
notifier/alertmanager.go
Normal file
90
notifier/alertmanager.go
Normal file
@ -0,0 +1,90 @@
|
||||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/model/relabel"
|
||||
)
|
||||
|
||||
// Alertmanager holds Alertmanager endpoint information.
|
||||
type alertmanager interface {
|
||||
url() *url.URL
|
||||
}
|
||||
|
||||
type alertmanagerLabels struct{ labels.Labels }
|
||||
|
||||
const pathLabel = "__alerts_path__"
|
||||
|
||||
func (a alertmanagerLabels) url() *url.URL {
|
||||
return &url.URL{
|
||||
Scheme: a.Get(model.SchemeLabel),
|
||||
Host: a.Get(model.AddressLabel),
|
||||
Path: a.Get(pathLabel),
|
||||
}
|
||||
}
|
||||
|
||||
// AlertmanagerFromGroup extracts a list of alertmanagers from a target group
|
||||
// and an associated AlertmanagerConfig.
|
||||
func AlertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig) ([]alertmanager, []alertmanager, error) {
|
||||
var res []alertmanager
|
||||
var droppedAlertManagers []alertmanager
|
||||
lb := labels.NewBuilder(labels.EmptyLabels())
|
||||
|
||||
for _, tlset := range tg.Targets {
|
||||
lb.Reset(labels.EmptyLabels())
|
||||
|
||||
for ln, lv := range tlset {
|
||||
lb.Set(string(ln), string(lv))
|
||||
}
|
||||
// Set configured scheme as the initial scheme label for overwrite.
|
||||
lb.Set(model.SchemeLabel, cfg.Scheme)
|
||||
lb.Set(pathLabel, postPath(cfg.PathPrefix, cfg.APIVersion))
|
||||
|
||||
// Combine target labels with target group labels.
|
||||
for ln, lv := range tg.Labels {
|
||||
if _, ok := tlset[ln]; !ok {
|
||||
lb.Set(string(ln), string(lv))
|
||||
}
|
||||
}
|
||||
|
||||
preRelabel := lb.Labels()
|
||||
keep := relabel.ProcessBuilder(lb, cfg.RelabelConfigs...)
|
||||
if !keep {
|
||||
droppedAlertManagers = append(droppedAlertManagers, alertmanagerLabels{preRelabel})
|
||||
continue
|
||||
}
|
||||
|
||||
addr := lb.Get(model.AddressLabel)
|
||||
if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
res = append(res, alertmanagerLabels{lb.Labels()})
|
||||
}
|
||||
return res, droppedAlertManagers, nil
|
||||
}
|
||||
|
||||
func postPath(pre string, v config.AlertmanagerAPIVersion) string {
|
||||
alertPushEndpoint := fmt.Sprintf("/api/%v/alerts", string(v))
|
||||
return path.Join("/", pre, alertPushEndpoint)
|
||||
}
|
62
notifier/alertmanager_test.go
Normal file
62
notifier/alertmanager_test.go
Normal file
@ -0,0 +1,62 @@
|
||||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
)
|
||||
|
||||
func TestPostPath(t *testing.T) {
|
||||
cases := []struct {
|
||||
in, out string
|
||||
}{
|
||||
{
|
||||
in: "",
|
||||
out: "/api/v2/alerts",
|
||||
},
|
||||
{
|
||||
in: "/",
|
||||
out: "/api/v2/alerts",
|
||||
},
|
||||
{
|
||||
in: "/prefix",
|
||||
out: "/prefix/api/v2/alerts",
|
||||
},
|
||||
{
|
||||
in: "/prefix//",
|
||||
out: "/prefix/api/v2/alerts",
|
||||
},
|
||||
{
|
||||
in: "prefix//",
|
||||
out: "/prefix/api/v2/alerts",
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV2))
|
||||
}
|
||||
}
|
||||
|
||||
func TestLabelSetNotReused(t *testing.T) {
|
||||
tg := makeInputTargetGroup()
|
||||
_, _, err := AlertmanagerFromGroup(tg, &config.AlertmanagerConfig{})
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
// Target modified during alertmanager extraction
|
||||
require.Equal(t, tg, makeInputTargetGroup())
|
||||
}
|
128
notifier/alertmanagerset.go
Normal file
128
notifier/alertmanagerset.go
Normal file
@ -0,0 +1,128 @@
|
||||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"sync"
|
||||
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/sigv4"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
// alertmanagerSet contains a set of Alertmanagers discovered via a group of service
|
||||
// discovery definitions that have a common configuration on how alerts should be sent.
|
||||
type alertmanagerSet struct {
|
||||
cfg *config.AlertmanagerConfig
|
||||
client *http.Client
|
||||
|
||||
metrics *alertMetrics
|
||||
|
||||
mtx sync.RWMutex
|
||||
ams []alertmanager
|
||||
droppedAms []alertmanager
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger *slog.Logger, metrics *alertMetrics) (*alertmanagerSet, error) {
|
||||
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "alertmanager")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t := client.Transport
|
||||
|
||||
if cfg.SigV4Config != nil {
|
||||
t, err = sigv4.NewSigV4RoundTripper(cfg.SigV4Config, client.Transport)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
client.Transport = t
|
||||
|
||||
s := &alertmanagerSet{
|
||||
client: client,
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
metrics: metrics,
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// sync extracts a deduplicated set of Alertmanager endpoints from a list
|
||||
// of target groups definitions.
|
||||
func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
|
||||
allAms := []alertmanager{}
|
||||
allDroppedAms := []alertmanager{}
|
||||
|
||||
for _, tg := range tgs {
|
||||
ams, droppedAms, err := AlertmanagerFromGroup(tg, s.cfg)
|
||||
if err != nil {
|
||||
s.logger.Error("Creating discovered Alertmanagers failed", "err", err)
|
||||
continue
|
||||
}
|
||||
allAms = append(allAms, ams...)
|
||||
allDroppedAms = append(allDroppedAms, droppedAms...)
|
||||
}
|
||||
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
previousAms := s.ams
|
||||
// Set new Alertmanagers and deduplicate them along their unique URL.
|
||||
s.ams = []alertmanager{}
|
||||
s.droppedAms = []alertmanager{}
|
||||
s.droppedAms = append(s.droppedAms, allDroppedAms...)
|
||||
seen := map[string]struct{}{}
|
||||
|
||||
for _, am := range allAms {
|
||||
us := am.url().String()
|
||||
if _, ok := seen[us]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// This will initialize the Counters for the AM to 0.
|
||||
s.metrics.sent.WithLabelValues(us)
|
||||
s.metrics.errors.WithLabelValues(us)
|
||||
|
||||
seen[us] = struct{}{}
|
||||
s.ams = append(s.ams, am)
|
||||
}
|
||||
// Now remove counters for any removed Alertmanagers.
|
||||
for _, am := range previousAms {
|
||||
us := am.url().String()
|
||||
if _, ok := seen[us]; ok {
|
||||
continue
|
||||
}
|
||||
s.metrics.latency.DeleteLabelValues(us)
|
||||
s.metrics.sent.DeleteLabelValues(us)
|
||||
s.metrics.errors.DeleteLabelValues(us)
|
||||
seen[us] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *alertmanagerSet) configHash() (string, error) {
|
||||
b, err := yaml.Marshal(s.cfg)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
hash := md5.Sum(b)
|
||||
return hex.EncodeToString(hash[:]), nil
|
||||
}
|
@ -16,27 +16,18 @@ package notifier
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-openapi/strfmt"
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/common/promslog"
|
||||
"github.com/prometheus/common/version"
|
||||
"github.com/prometheus/sigv4"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
@ -60,53 +51,6 @@ const (
|
||||
|
||||
var userAgent = version.PrometheusUserAgent()
|
||||
|
||||
// Alert is a generic representation of an alert in the Prometheus eco-system.
|
||||
type Alert struct {
|
||||
// Label value pairs for purpose of aggregation, matching, and disposition
|
||||
// dispatching. This must minimally include an "alertname" label.
|
||||
Labels labels.Labels `json:"labels"`
|
||||
|
||||
// Extra key/value information which does not define alert identity.
|
||||
Annotations labels.Labels `json:"annotations"`
|
||||
|
||||
// The known time range for this alert. Both ends are optional.
|
||||
StartsAt time.Time `json:"startsAt,omitempty"`
|
||||
EndsAt time.Time `json:"endsAt,omitempty"`
|
||||
GeneratorURL string `json:"generatorURL,omitempty"`
|
||||
}
|
||||
|
||||
// Name returns the name of the alert. It is equivalent to the "alertname" label.
|
||||
func (a *Alert) Name() string {
|
||||
return a.Labels.Get(labels.AlertName)
|
||||
}
|
||||
|
||||
// Hash returns a hash over the alert. It is equivalent to the alert labels hash.
|
||||
func (a *Alert) Hash() uint64 {
|
||||
return a.Labels.Hash()
|
||||
}
|
||||
|
||||
func (a *Alert) String() string {
|
||||
s := fmt.Sprintf("%s[%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7])
|
||||
if a.Resolved() {
|
||||
return s + "[resolved]"
|
||||
}
|
||||
return s + "[active]"
|
||||
}
|
||||
|
||||
// Resolved returns true iff the activity interval ended in the past.
|
||||
func (a *Alert) Resolved() bool {
|
||||
return a.ResolvedAt(time.Now())
|
||||
}
|
||||
|
||||
// ResolvedAt returns true iff the activity interval ended before
|
||||
// the given timestamp.
|
||||
func (a *Alert) ResolvedAt(ts time.Time) bool {
|
||||
if a.EndsAt.IsZero() {
|
||||
return false
|
||||
}
|
||||
return !a.EndsAt.After(ts)
|
||||
}
|
||||
|
||||
// Manager is responsible for dispatching alert notifications to an
|
||||
// alert manager service.
|
||||
type Manager struct {
|
||||
@ -140,84 +84,6 @@ type Options struct {
|
||||
MaxBatchSize int
|
||||
}
|
||||
|
||||
type alertMetrics struct {
|
||||
latency *prometheus.SummaryVec
|
||||
errors *prometheus.CounterVec
|
||||
sent *prometheus.CounterVec
|
||||
dropped prometheus.Counter
|
||||
queueLength prometheus.GaugeFunc
|
||||
queueCapacity prometheus.Gauge
|
||||
alertmanagersDiscovered prometheus.GaugeFunc
|
||||
}
|
||||
|
||||
func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanagersDiscovered func() float64) *alertMetrics {
|
||||
m := &alertMetrics{
|
||||
latency: prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "latency_seconds",
|
||||
Help: "Latency quantiles for sending alert notifications.",
|
||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{alertmanagerLabel},
|
||||
),
|
||||
errors: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "errors_total",
|
||||
Help: "Total number of sent alerts affected by errors.",
|
||||
},
|
||||
[]string{alertmanagerLabel},
|
||||
),
|
||||
sent: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "sent_total",
|
||||
Help: "Total number of alerts sent.",
|
||||
},
|
||||
[]string{alertmanagerLabel},
|
||||
),
|
||||
dropped: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dropped_total",
|
||||
Help: "Total number of alerts dropped due to errors when sending to Alertmanager.",
|
||||
}),
|
||||
queueLength: prometheus.NewGaugeFunc(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "queue_length",
|
||||
Help: "The number of alert notifications in the queue.",
|
||||
}, queueLen),
|
||||
queueCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "queue_capacity",
|
||||
Help: "The capacity of the alert notifications queue.",
|
||||
}),
|
||||
alertmanagersDiscovered: prometheus.NewGaugeFunc(prometheus.GaugeOpts{
|
||||
Name: "prometheus_notifications_alertmanagers_discovered",
|
||||
Help: "The number of alertmanagers discovered and active.",
|
||||
}, alertmanagersDiscovered),
|
||||
}
|
||||
|
||||
m.queueCapacity.Set(float64(queueCap))
|
||||
|
||||
if r != nil {
|
||||
r.MustRegister(
|
||||
m.latency,
|
||||
m.errors,
|
||||
m.sent,
|
||||
m.dropped,
|
||||
m.queueLength,
|
||||
m.queueCapacity,
|
||||
m.alertmanagersDiscovered,
|
||||
)
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func do(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) {
|
||||
if client == nil {
|
||||
client = http.DefaultClient
|
||||
@ -470,28 +336,6 @@ func (n *Manager) Send(alerts ...*Alert) {
|
||||
n.setMore()
|
||||
}
|
||||
|
||||
func relabelAlerts(relabelConfigs []*relabel.Config, externalLabels labels.Labels, alerts []*Alert) []*Alert {
|
||||
lb := labels.NewBuilder(labels.EmptyLabels())
|
||||
var relabeledAlerts []*Alert
|
||||
|
||||
for _, a := range alerts {
|
||||
lb.Reset(a.Labels)
|
||||
externalLabels.Range(func(l labels.Label) {
|
||||
if a.Labels.Get(l.Name) == "" {
|
||||
lb.Set(l.Name, l.Value)
|
||||
}
|
||||
})
|
||||
|
||||
keep := relabel.ProcessBuilder(lb, relabelConfigs...)
|
||||
if !keep {
|
||||
continue
|
||||
}
|
||||
a.Labels = lb.Labels()
|
||||
relabeledAlerts = append(relabeledAlerts, a)
|
||||
}
|
||||
return relabeledAlerts
|
||||
}
|
||||
|
||||
// setMore signals that the alert queue has items.
|
||||
func (n *Manager) setMore() {
|
||||
// If we cannot send on the channel, it means the signal already exists
|
||||
@ -661,34 +505,6 @@ func (n *Manager) sendAll(alerts ...*Alert) bool {
|
||||
return allAmSetsCovered
|
||||
}
|
||||
|
||||
func alertsToOpenAPIAlerts(alerts []*Alert) models.PostableAlerts {
|
||||
openAPIAlerts := models.PostableAlerts{}
|
||||
for _, a := range alerts {
|
||||
start := strfmt.DateTime(a.StartsAt)
|
||||
end := strfmt.DateTime(a.EndsAt)
|
||||
openAPIAlerts = append(openAPIAlerts, &models.PostableAlert{
|
||||
Annotations: labelsToOpenAPILabelSet(a.Annotations),
|
||||
EndsAt: end,
|
||||
StartsAt: start,
|
||||
Alert: models.Alert{
|
||||
GeneratorURL: strfmt.URI(a.GeneratorURL),
|
||||
Labels: labelsToOpenAPILabelSet(a.Labels),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return openAPIAlerts
|
||||
}
|
||||
|
||||
func labelsToOpenAPILabelSet(modelLabelSet labels.Labels) models.LabelSet {
|
||||
apiLabelSet := models.LabelSet{}
|
||||
modelLabelSet.Range(func(label labels.Label) {
|
||||
apiLabelSet[label.Name] = label.Value
|
||||
})
|
||||
|
||||
return apiLabelSet
|
||||
}
|
||||
|
||||
func (n *Manager) sendOne(ctx context.Context, c *http.Client, url string, b []byte) error {
|
||||
req, err := http.NewRequest(http.MethodPost, url, bytes.NewReader(b))
|
||||
if err != nil {
|
||||
@ -727,165 +543,3 @@ func (n *Manager) Stop() {
|
||||
close(n.stopRequested)
|
||||
})
|
||||
}
|
||||
|
||||
// Alertmanager holds Alertmanager endpoint information.
|
||||
type alertmanager interface {
|
||||
url() *url.URL
|
||||
}
|
||||
|
||||
type alertmanagerLabels struct{ labels.Labels }
|
||||
|
||||
const pathLabel = "__alerts_path__"
|
||||
|
||||
func (a alertmanagerLabels) url() *url.URL {
|
||||
return &url.URL{
|
||||
Scheme: a.Get(model.SchemeLabel),
|
||||
Host: a.Get(model.AddressLabel),
|
||||
Path: a.Get(pathLabel),
|
||||
}
|
||||
}
|
||||
|
||||
// alertmanagerSet contains a set of Alertmanagers discovered via a group of service
|
||||
// discovery definitions that have a common configuration on how alerts should be sent.
|
||||
type alertmanagerSet struct {
|
||||
cfg *config.AlertmanagerConfig
|
||||
client *http.Client
|
||||
|
||||
metrics *alertMetrics
|
||||
|
||||
mtx sync.RWMutex
|
||||
ams []alertmanager
|
||||
droppedAms []alertmanager
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger *slog.Logger, metrics *alertMetrics) (*alertmanagerSet, error) {
|
||||
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "alertmanager")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t := client.Transport
|
||||
|
||||
if cfg.SigV4Config != nil {
|
||||
t, err = sigv4.NewSigV4RoundTripper(cfg.SigV4Config, client.Transport)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
client.Transport = t
|
||||
|
||||
s := &alertmanagerSet{
|
||||
client: client,
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
metrics: metrics,
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// sync extracts a deduplicated set of Alertmanager endpoints from a list
|
||||
// of target groups definitions.
|
||||
func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
|
||||
allAms := []alertmanager{}
|
||||
allDroppedAms := []alertmanager{}
|
||||
|
||||
for _, tg := range tgs {
|
||||
ams, droppedAms, err := AlertmanagerFromGroup(tg, s.cfg)
|
||||
if err != nil {
|
||||
s.logger.Error("Creating discovered Alertmanagers failed", "err", err)
|
||||
continue
|
||||
}
|
||||
allAms = append(allAms, ams...)
|
||||
allDroppedAms = append(allDroppedAms, droppedAms...)
|
||||
}
|
||||
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
previousAms := s.ams
|
||||
// Set new Alertmanagers and deduplicate them along their unique URL.
|
||||
s.ams = []alertmanager{}
|
||||
s.droppedAms = []alertmanager{}
|
||||
s.droppedAms = append(s.droppedAms, allDroppedAms...)
|
||||
seen := map[string]struct{}{}
|
||||
|
||||
for _, am := range allAms {
|
||||
us := am.url().String()
|
||||
if _, ok := seen[us]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// This will initialize the Counters for the AM to 0.
|
||||
s.metrics.sent.WithLabelValues(us)
|
||||
s.metrics.errors.WithLabelValues(us)
|
||||
|
||||
seen[us] = struct{}{}
|
||||
s.ams = append(s.ams, am)
|
||||
}
|
||||
// Now remove counters for any removed Alertmanagers.
|
||||
for _, am := range previousAms {
|
||||
us := am.url().String()
|
||||
if _, ok := seen[us]; ok {
|
||||
continue
|
||||
}
|
||||
s.metrics.latency.DeleteLabelValues(us)
|
||||
s.metrics.sent.DeleteLabelValues(us)
|
||||
s.metrics.errors.DeleteLabelValues(us)
|
||||
seen[us] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *alertmanagerSet) configHash() (string, error) {
|
||||
b, err := yaml.Marshal(s.cfg)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
hash := md5.Sum(b)
|
||||
return hex.EncodeToString(hash[:]), nil
|
||||
}
|
||||
|
||||
func postPath(pre string, v config.AlertmanagerAPIVersion) string {
|
||||
alertPushEndpoint := fmt.Sprintf("/api/%v/alerts", string(v))
|
||||
return path.Join("/", pre, alertPushEndpoint)
|
||||
}
|
||||
|
||||
// AlertmanagerFromGroup extracts a list of alertmanagers from a target group
|
||||
// and an associated AlertmanagerConfig.
|
||||
func AlertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig) ([]alertmanager, []alertmanager, error) {
|
||||
var res []alertmanager
|
||||
var droppedAlertManagers []alertmanager
|
||||
lb := labels.NewBuilder(labels.EmptyLabels())
|
||||
|
||||
for _, tlset := range tg.Targets {
|
||||
lb.Reset(labels.EmptyLabels())
|
||||
|
||||
for ln, lv := range tlset {
|
||||
lb.Set(string(ln), string(lv))
|
||||
}
|
||||
// Set configured scheme as the initial scheme label for overwrite.
|
||||
lb.Set(model.SchemeLabel, cfg.Scheme)
|
||||
lb.Set(pathLabel, postPath(cfg.PathPrefix, cfg.APIVersion))
|
||||
|
||||
// Combine target labels with target group labels.
|
||||
for ln, lv := range tg.Labels {
|
||||
if _, ok := tlset[ln]; !ok {
|
||||
lb.Set(string(ln), string(lv))
|
||||
}
|
||||
}
|
||||
|
||||
preRelabel := lb.Labels()
|
||||
keep := relabel.ProcessBuilder(lb, cfg.RelabelConfigs...)
|
||||
if !keep {
|
||||
droppedAlertManagers = append(droppedAlertManagers, alertmanagerLabels{preRelabel})
|
||||
continue
|
||||
}
|
||||
|
||||
addr := lb.Get(model.AddressLabel)
|
||||
if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
res = append(res, alertmanagerLabels{lb.Labels()})
|
||||
}
|
||||
return res, droppedAlertManagers, nil
|
||||
}
|
@ -26,7 +26,6 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
@ -45,36 +44,6 @@ import (
|
||||
|
||||
const maxBatchSize = 256
|
||||
|
||||
func TestPostPath(t *testing.T) {
|
||||
cases := []struct {
|
||||
in, out string
|
||||
}{
|
||||
{
|
||||
in: "",
|
||||
out: "/api/v2/alerts",
|
||||
},
|
||||
{
|
||||
in: "/",
|
||||
out: "/api/v2/alerts",
|
||||
},
|
||||
{
|
||||
in: "/prefix",
|
||||
out: "/prefix/api/v2/alerts",
|
||||
},
|
||||
{
|
||||
in: "/prefix//",
|
||||
out: "/prefix/api/v2/alerts",
|
||||
},
|
||||
{
|
||||
in: "prefix//",
|
||||
out: "/prefix/api/v2/alerts",
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV2))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandlerNextBatch(t *testing.T) {
|
||||
h := NewManager(&Options{}, nil)
|
||||
|
||||
@ -620,16 +589,6 @@ func (a alertmanagerMock) url() *url.URL {
|
||||
return u
|
||||
}
|
||||
|
||||
func TestLabelSetNotReused(t *testing.T) {
|
||||
tg := makeInputTargetGroup()
|
||||
_, _, err := AlertmanagerFromGroup(tg, &config.AlertmanagerConfig{})
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
// Target modified during alertmanager extraction
|
||||
require.Equal(t, tg, makeInputTargetGroup())
|
||||
}
|
||||
|
||||
func TestReload(t *testing.T) {
|
||||
tests := []struct {
|
||||
in *targetgroup.Group
|
||||
@ -744,10 +703,6 @@ func makeInputTargetGroup() *targetgroup.Group {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLabelsToOpenAPILabelSet(t *testing.T) {
|
||||
require.Equal(t, models.LabelSet{"aaa": "111", "bbb": "222"}, labelsToOpenAPILabelSet(labels.FromStrings("aaa", "111", "bbb", "222")))
|
||||
}
|
||||
|
||||
// TestHangingNotifier ensures that the notifier takes into account SD changes even when there are
|
||||
// queued alerts. This test reproduces the issue described in https://github.com/prometheus/prometheus/issues/13676.
|
||||
// and https://github.com/prometheus/prometheus/issues/8768.
|
94
notifier/metric.go
Normal file
94
notifier/metric.go
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package notifier
|
||||
|
||||
import "github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
type alertMetrics struct {
|
||||
latency *prometheus.SummaryVec
|
||||
errors *prometheus.CounterVec
|
||||
sent *prometheus.CounterVec
|
||||
dropped prometheus.Counter
|
||||
queueLength prometheus.GaugeFunc
|
||||
queueCapacity prometheus.Gauge
|
||||
alertmanagersDiscovered prometheus.GaugeFunc
|
||||
}
|
||||
|
||||
func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanagersDiscovered func() float64) *alertMetrics {
|
||||
m := &alertMetrics{
|
||||
latency: prometheus.NewSummaryVec(prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "latency_seconds",
|
||||
Help: "Latency quantiles for sending alert notifications.",
|
||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{alertmanagerLabel},
|
||||
),
|
||||
errors: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "errors_total",
|
||||
Help: "Total number of sent alerts affected by errors.",
|
||||
},
|
||||
[]string{alertmanagerLabel},
|
||||
),
|
||||
sent: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "sent_total",
|
||||
Help: "Total number of alerts sent.",
|
||||
},
|
||||
[]string{alertmanagerLabel},
|
||||
),
|
||||
dropped: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dropped_total",
|
||||
Help: "Total number of alerts dropped due to errors when sending to Alertmanager.",
|
||||
}),
|
||||
queueLength: prometheus.NewGaugeFunc(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "queue_length",
|
||||
Help: "The number of alert notifications in the queue.",
|
||||
}, queueLen),
|
||||
queueCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "queue_capacity",
|
||||
Help: "The capacity of the alert notifications queue.",
|
||||
}),
|
||||
alertmanagersDiscovered: prometheus.NewGaugeFunc(prometheus.GaugeOpts{
|
||||
Name: "prometheus_notifications_alertmanagers_discovered",
|
||||
Help: "The number of alertmanagers discovered and active.",
|
||||
}, alertmanagersDiscovered),
|
||||
}
|
||||
|
||||
m.queueCapacity.Set(float64(queueCap))
|
||||
|
||||
if r != nil {
|
||||
r.MustRegister(
|
||||
m.latency,
|
||||
m.errors,
|
||||
m.sent,
|
||||
m.dropped,
|
||||
m.queueLength,
|
||||
m.queueCapacity,
|
||||
m.alertmanagersDiscovered,
|
||||
)
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
49
notifier/util.go
Normal file
49
notifier/util.go
Normal file
@ -0,0 +1,49 @@
|
||||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"github.com/go-openapi/strfmt"
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
)
|
||||
|
||||
func alertsToOpenAPIAlerts(alerts []*Alert) models.PostableAlerts {
|
||||
openAPIAlerts := models.PostableAlerts{}
|
||||
for _, a := range alerts {
|
||||
start := strfmt.DateTime(a.StartsAt)
|
||||
end := strfmt.DateTime(a.EndsAt)
|
||||
openAPIAlerts = append(openAPIAlerts, &models.PostableAlert{
|
||||
Annotations: labelsToOpenAPILabelSet(a.Annotations),
|
||||
EndsAt: end,
|
||||
StartsAt: start,
|
||||
Alert: models.Alert{
|
||||
GeneratorURL: strfmt.URI(a.GeneratorURL),
|
||||
Labels: labelsToOpenAPILabelSet(a.Labels),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return openAPIAlerts
|
||||
}
|
||||
|
||||
func labelsToOpenAPILabelSet(modelLabelSet labels.Labels) models.LabelSet {
|
||||
apiLabelSet := models.LabelSet{}
|
||||
modelLabelSet.Range(func(label labels.Label) {
|
||||
apiLabelSet[label.Name] = label.Value
|
||||
})
|
||||
|
||||
return apiLabelSet
|
||||
}
|
27
notifier/util_test.go
Normal file
27
notifier/util_test.go
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright 2013 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
)
|
||||
|
||||
func TestLabelsToOpenAPILabelSet(t *testing.T) {
|
||||
require.Equal(t, models.LabelSet{"aaa": "111", "bbb": "222"}, labelsToOpenAPILabelSet(labels.FromStrings("aaa", "111", "bbb", "222")))
|
||||
}
|
Loading…
Reference in New Issue
Block a user