diff --git a/docs/querying/api.md b/docs/querying/api.md index 139eb58dd7..ef94551887 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -363,6 +363,103 @@ $ curl http://localhost:9090/api/v1/targets } ``` + +## Rules + +The `/rules` API endpoint returns a list of alerting and recording rules that +are currently loaded. In addition it returns the currently active alerts fired +by the Prometheus instance of each alerting rule. + +As the `/rules` endpoint is fairly new, it does not have the same stability +guarantees as the overarching API v1. + +``` +GET /api/v1/rules +``` + +```json +$ curl http://localhost:9090/api/v1/rules + +{ + "data": { + "groups": [ + { + "rules": [ + { + "alerts": [ + { + "activeAt": "2018-07-04T20:27:12.60602144+02:00", + "annotations": { + "summary": "High request latency" + }, + "labels": { + "alertname": "HighRequestLatency", + "severity": "page" + }, + "state": "firing", + "value": 1 + } + ], + "annotations": { + "summary": "High request latency" + }, + "duration": 600, + "labels": { + "severity": "page" + }, + "name": "HighRequestLatency", + "query": "job:request_latency_seconds:mean5m{job=\"myjob\"} > 0.5", + "type": "alerting" + }, + { + "name": "job:http_inprogress_requests:sum", + "query": "sum(http_inprogress_requests) by (job)", + "type": "recording" + } + ], + "file": "/rules.yaml", + "interval": 60, + "name": "example" + } + ] + }, + "status": "success" +} +``` + + +## Alerts + +The `/alerts` endpoint returns a list of all active alerts. + +As the `/alerts` endpoint is fairly new, it does not have the same stability +guarantees as the overarching API v1. + +``` +GET /api/v1/alerts +``` + +```json +$ curl http://localhost:9090/api/v1/alerts + +{ + "data": { + "alerts": [ + { + "activeAt": "2018-07-04T20:27:12.60602144+02:00", + "annotations": {}, + "labels": { + "alertname": "my-alert" + }, + "state": "firing", + "value": 1 + } + ] + }, + "status": "success" +} +``` + ## Querying target metadata The following endpoint returns metadata about metrics currently scraped by targets. diff --git a/rules/alerting.go b/rules/alerting.go index d7c39904a3..015ae3a914 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -126,11 +126,31 @@ func NewAlertingRule(name string, vec promql.Expr, hold time.Duration, lbls, ann } } -// Name returns the name of the alert. +// Name returns the name of the alerting rule. func (r *AlertingRule) Name() string { return r.name } +// Query returns the query expression of the alerting rule. +func (r *AlertingRule) Query() promql.Expr { + return r.vector +} + +// Duration returns the hold duration of the alerting rule. +func (r *AlertingRule) Duration() time.Duration { + return r.holdDuration +} + +// Labels returns the labels of the alerting rule. +func (r *AlertingRule) Labels() labels.Labels { + return r.labels +} + +// Annotations returns the annotations of the alerting rule. +func (r *AlertingRule) Annotations() labels.Labels { + return r.annotations +} + func (r *AlertingRule) equal(o *AlertingRule) bool { return r.name == o.name && labels.Equal(r.labels, o.labels) } diff --git a/rules/manager.go b/rules/manager.go index 4477bd6a65..7ddca74328 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -188,6 +188,9 @@ func (g *Group) File() string { return g.file } // Rules returns the group's rules. func (g *Group) Rules() []Rule { return g.rules } +// Interval returns the group's interval. +func (g *Group) Interval() time.Duration { return g.interval } + func (g *Group) run(ctx context.Context) { defer close(g.terminated) diff --git a/rules/recording.go b/rules/recording.go index 26e7cc408b..69fdfa03e2 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -52,6 +52,16 @@ func (rule *RecordingRule) Name() string { return rule.name } +// Query returns the rule query expression. +func (rule *RecordingRule) Query() promql.Expr { + return rule.vector +} + +// Labels returns the rule labels. +func (rule *RecordingRule) Labels() labels.Labels { + return rule.labels +} + // Eval evaluates the rule and then overrides the metric names and labels accordingly. func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, _ *url.URL) (promql.Vector, error) { vector, err := query(ctx, rule.vector.String(), ts) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 4d990eb648..f9be522d8a 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -41,6 +41,7 @@ import ( "github.com/prometheus/prometheus/pkg/timestamp" "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" @@ -95,6 +96,11 @@ type alertmanagerRetriever interface { DroppedAlertmanagers() []*url.URL } +type rulesRetriever interface { + RuleGroups() []*rules.Group + AlertingRules() []*rules.AlertingRule +} + type response struct { Status status `json:"status"` Data interface{} `json:"data,omitempty"` @@ -119,11 +125,11 @@ type API struct { targetRetriever targetRetriever alertmanagerRetriever alertmanagerRetriever - - now func() time.Time - config func() config.Config - flagsMap map[string]string - ready func(http.HandlerFunc) http.HandlerFunc + rulesRetriever rulesRetriever + now func() time.Time + config func() config.Config + flagsMap map[string]string + ready func(http.HandlerFunc) http.HandlerFunc db func() *tsdb.DB enableAdmin bool @@ -142,18 +148,20 @@ func NewAPI( db func() *tsdb.DB, enableAdmin bool, logger log.Logger, + rr rulesRetriever, ) *API { return &API{ QueryEngine: qe, Queryable: q, targetRetriever: tr, alertmanagerRetriever: ar, - now: time.Now, - config: configFunc, - flagsMap: flagsMap, - ready: readyFunc, - db: db, - enableAdmin: enableAdmin, + now: time.Now, + config: configFunc, + flagsMap: flagsMap, + ready: readyFunc, + db: db, + enableAdmin: enableAdmin, + rulesRetriever: rr, } } @@ -199,6 +207,9 @@ func (api *API) Register(r *route.Router) { r.Get("/status/flags", wrap(api.serveFlags)) r.Post("/read", api.ready(http.HandlerFunc(api.remoteRead))) + r.Get("/alerts", wrap(api.alerts)) + r.Get("/rules", wrap(api.rules)) + // Admin APIs r.Post("/admin/tsdb/delete_series", wrap(api.deleteSeries)) r.Post("/admin/tsdb/clean_tombstones", wrap(api.cleanTombstones)) @@ -578,6 +589,132 @@ func (api *API) alertmanagers(r *http.Request) (interface{}, *apiError, func()) return ams, nil, nil } +// AlertDiscovery has info for all active alerts. +type AlertDiscovery struct { + Alerts []*Alert `json:"alerts"` +} + +// Alert has info for an alert. +type Alert struct { + Labels labels.Labels `json:"labels"` + Annotations labels.Labels `json:"annotations"` + State string `json:"state"` + ActiveAt *time.Time `json:"activeAt,omitempty"` + Value float64 `json:"value"` +} + +func (api *API) alerts(r *http.Request) (interface{}, *apiError, func()) { + alertingRules := api.rulesRetriever.AlertingRules() + alerts := []*Alert{} + + for _, alertingRule := range alertingRules { + alerts = append( + alerts, + rulesAlertsToAPIAlerts(alertingRule.ActiveAlerts())..., + ) + } + + res := &AlertDiscovery{Alerts: alerts} + + return res, nil, nil +} + +func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert { + apiAlerts := make([]*Alert, len(rulesAlerts)) + for i, ruleAlert := range rulesAlerts { + apiAlerts[i] = &Alert{ + Labels: ruleAlert.Labels, + Annotations: ruleAlert.Annotations, + State: ruleAlert.State.String(), + ActiveAt: &ruleAlert.ActiveAt, + Value: ruleAlert.Value, + } + } + + return apiAlerts +} + +// RuleDiscovery has info for all rules +type RuleDiscovery struct { + RuleGroups []*RuleGroup `json:"groups"` +} + +// RuleGroup has info for rules which are part of a group +type RuleGroup struct { + Name string `json:"name"` + File string `json:"file"` + // In order to preserve rule ordering, while exposing type (alerting or recording) + // specific properties, both alerting and recording rules are exposed in the + // same array. + Rules []rule `json:"rules"` + Interval float64 `json:"interval"` +} + +type rule interface{} + +type alertingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Duration float64 `json:"duration"` + Labels labels.Labels `json:"labels"` + Annotations labels.Labels `json:"annotations"` + Alerts []*Alert `json:"alerts"` + // Type of an alertingRule is always "alerting". + Type string `json:"type"` +} + +type recordingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Labels labels.Labels `json:"labels,omitempty"` + // Type of a recordingRule is always "recording". + Type string `json:"type"` +} + +func (api *API) rules(r *http.Request) (interface{}, *apiError, func()) { + ruleGroups := api.rulesRetriever.RuleGroups() + res := &RuleDiscovery{RuleGroups: make([]*RuleGroup, len(ruleGroups))} + for i, grp := range ruleGroups { + apiRuleGroup := &RuleGroup{ + Name: grp.Name(), + File: grp.File(), + Interval: grp.Interval().Seconds(), + Rules: []rule{}, + } + + for _, r := range grp.Rules() { + var enrichedRule rule + + switch rule := r.(type) { + case *rules.AlertingRule: + enrichedRule = alertingRule{ + Name: rule.Name(), + Query: rule.Query().String(), + Duration: rule.Duration().Seconds(), + Labels: rule.Labels(), + Annotations: rule.Annotations(), + Alerts: rulesAlertsToAPIAlerts(rule.ActiveAlerts()), + Type: "alerting", + } + case *rules.RecordingRule: + enrichedRule = recordingRule{ + Name: rule.Name(), + Query: rule.Query().String(), + Labels: rule.Labels(), + Type: "recording", + } + default: + err := fmt.Errorf("failed to assert type of rule '%v'", rule.Name()) + return nil, &apiError{errorInternal, err}, nil + } + + apiRuleGroup.Rules = append(apiRuleGroup.Rules, enrichedRule) + } + res.RuleGroups[i] = apiRuleGroup + } + return res, nil, nil +} + type prometheusConfig struct { YAML string `json:"yaml"` } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index a4807daf29..e0d0f23c50 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -19,6 +19,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/go-kit/kit/log" "io/ioutil" "math" "net/http" @@ -41,9 +42,11 @@ import ( "github.com/prometheus/prometheus/pkg/timestamp" "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" + "github.com/prometheus/prometheus/util/testutil" ) type testTargetRetriever struct{} @@ -98,6 +101,73 @@ func (t testAlertmanagerRetriever) DroppedAlertmanagers() []*url.URL { } } +type rulesRetrieverMock struct { + testing *testing.T +} + +func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { + expr1, err := promql.ParseExpr(`absent(test_metric3) != 1`) + if err != nil { + m.testing.Fatalf("unable to parse alert expression: %s", err) + } + expr2, err := promql.ParseExpr(`up == 1`) + if err != nil { + m.testing.Fatalf("Unable to parse alert expression: %s", err) + } + + rule1 := rules.NewAlertingRule( + "test_metric3", + expr1, + time.Second, + labels.Labels{}, + labels.Labels{}, + log.NewNopLogger(), + ) + rule2 := rules.NewAlertingRule( + "test_metric4", + expr2, + time.Second, + labels.Labels{}, + labels.Labels{}, + log.NewNopLogger(), + ) + var r []*rules.AlertingRule + r = append(r, rule1) + r = append(r, rule2) + return r +} + +func (m rulesRetrieverMock) RuleGroups() []*rules.Group { + var ar rulesRetrieverMock + arules := ar.AlertingRules() + storage := testutil.NewStorage(m.testing) + defer storage.Close() + + engine := promql.NewEngine(nil, nil, 10, 10*time.Second) + opts := &rules.ManagerOptions{ + QueryFunc: rules.EngineQueryFunc(engine, storage), + Appendable: storage, + Context: context.Background(), + Logger: log.NewNopLogger(), + } + + var r []rules.Rule + + for _, alertrule := range arules { + r = append(r, alertrule) + } + + recordingExpr, err := promql.ParseExpr(`vector(1)`) + if err != nil { + m.testing.Fatalf("unable to parse alert expression: %s", err) + } + recordingRule := rules.NewRecordingRule("recording-rule-1", recordingExpr, labels.Labels{}) + r = append(r, recordingRule) + + group := rules.NewGroup("grp", "/path/to/file", time.Second, r, opts) + return []*rules.Group{group} +} + var samplePrometheusCfg = config.Config{ GlobalConfig: config.GlobalConfig{}, AlertingConfig: config.AlertingConfig{}, @@ -130,16 +200,29 @@ func TestEndpoints(t *testing.T) { now := time.Now() + var algr rulesRetrieverMock + algr.testing = t + algr.AlertingRules() + algr.RuleGroups() + t.Run("local", func(t *testing.T) { + var algr rulesRetrieverMock + algr.testing = t + + algr.AlertingRules() + + algr.RuleGroups() + api := &API{ Queryable: suite.Storage(), QueryEngine: suite.QueryEngine(), targetRetriever: testTargetRetriever{}, alertmanagerRetriever: testAlertmanagerRetriever{}, - now: func() time.Time { return now }, - config: func() config.Config { return samplePrometheusCfg }, - flagsMap: sampleFlagMap, - ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, + now: func() time.Time { return now }, + config: func() config.Config { return samplePrometheusCfg }, + flagsMap: sampleFlagMap, + ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, + rulesRetriever: algr, } testEndpoints(t, api, true) @@ -176,15 +259,23 @@ func TestEndpoints(t *testing.T) { t.Fatal(err) } + var algr rulesRetrieverMock + algr.testing = t + + algr.AlertingRules() + + algr.RuleGroups() + api := &API{ Queryable: remote, QueryEngine: suite.QueryEngine(), targetRetriever: testTargetRetriever{}, alertmanagerRetriever: testAlertmanagerRetriever{}, - now: func() time.Time { return now }, - config: func() config.Config { return samplePrometheusCfg }, - flagsMap: sampleFlagMap, - ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, + now: func() time.Time { return now }, + config: func() config.Config { return samplePrometheusCfg }, + flagsMap: sampleFlagMap, + ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, + rulesRetriever: algr, } testEndpoints(t, api, false) @@ -237,7 +328,6 @@ func setupRemote(s storage.Storage) *httptest.Server { } func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { - start := time.Unix(0, 0) type test struct { @@ -567,6 +657,50 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { endpoint: api.serveFlags, response: sampleFlagMap, }, + { + endpoint: api.alerts, + response: &AlertDiscovery{ + Alerts: []*Alert{}, + }, + }, + { + endpoint: api.rules, + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ + { + Name: "grp", + File: "/path/to/file", + Interval: 1, + Rules: []rule{ + alertingRule{ + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Type: "alerting", + }, + alertingRule{ + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Type: "alerting", + }, + recordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Type: "recording", + }, + }, + }, + }, + }, + }, } if testLabelAPI { @@ -646,7 +780,21 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { t.Fatalf("Expected error of type %q but got none", test.errType) } if !reflect.DeepEqual(resp, test.response) { - t.Fatalf("Response does not match, expected:\n%+v\ngot:\n%+v", test.response, resp) + respJSON, err := json.Marshal(resp) + if err != nil { + t.Fatalf("failed to marshal response as JSON: %v", err.Error()) + } + + expectedRespJSON, err := json.Marshal(test.response) + if err != nil { + t.Fatalf("failed to marshal expected response as JSON: %v", err.Error()) + } + + t.Fatalf( + "Response does not match, expected:\n%+v\ngot:\n%+v", + string(expectedRespJSON), + string(respJSON), + ) } } } diff --git a/web/web.go b/web/web.go index af75a08278..5fa6309ab7 100644 --- a/web/web.go +++ b/web/web.go @@ -228,6 +228,7 @@ func New(logger log.Logger, o *Options) *Handler { h.options.TSDB, h.options.EnableAdminAPI, logger, + h.ruleManager, ) if o.RoutePrefix != "/" {