mirror of
https://github.com/prometheus/prometheus.git
synced 2026-05-05 20:36:13 +02:00
Merge pull request #18244 from Ketan-Goyal/main
Alerting: Handling the Case of Increased holdDuration in alert rules
This commit is contained in:
commit
efbdc3f886
@ -523,6 +523,14 @@ func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts t
|
||||
a.FiredAt = ts
|
||||
}
|
||||
|
||||
// If the alert is firing and the active time is less than the new hold duration, set the state to pending.
|
||||
if a.State == StateFiring && ts.Sub(a.ActiveAt) < r.holdDuration {
|
||||
a.State = StatePending
|
||||
a.FiredAt = time.Time{}
|
||||
a.LastSentAt = time.Time{}
|
||||
a.KeepFiringSince = time.Time{}
|
||||
}
|
||||
|
||||
if r.restored.Load() {
|
||||
vec = append(vec, r.sample(a, ts.Add(-queryOffset)))
|
||||
vec = append(vec, r.forStateSample(a, ts.Add(-queryOffset), float64(a.ActiveAt.Unix())))
|
||||
|
||||
@ -1063,3 +1063,85 @@ func TestAlertingRule_ActiveAlertsCount(t *testing.T) {
|
||||
|
||||
require.Equal(t, 1, rule.ActiveAlertsCount())
|
||||
}
|
||||
|
||||
// TestFiringAlertResetToPendingOnHoldDurationIncrease verifies that when the
|
||||
// holdDuration ("for" duration) is increased on a rule that already has a
|
||||
// firing alert, the alert is demoted back to StatePending because the elapsed
|
||||
// time since ActiveAt no longer meets the new, larger holdDuration.
|
||||
func TestFiringAlertResetToPendingOnHoldDurationIncrease(t *testing.T) {
|
||||
shortHold := 15 * time.Second
|
||||
longHold := 1 * time.Hour
|
||||
|
||||
expr, err := testParser.ParseExpr("foo")
|
||||
require.NoError(t, err)
|
||||
|
||||
rule := NewAlertingRule(
|
||||
"TestResetToPending",
|
||||
expr,
|
||||
shortHold,
|
||||
0,
|
||||
labels.EmptyLabels(),
|
||||
labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil,
|
||||
)
|
||||
|
||||
baseTime := time.Unix(0, 0)
|
||||
q := func(_ context.Context, _ string, ts time.Time) (promql.Vector, error) {
|
||||
return promql.Vector{
|
||||
promql.Sample{
|
||||
Metric: labels.EmptyLabels(),
|
||||
T: timestamp.FromTime(ts),
|
||||
F: 1,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Eval at t=0: creates the alert in StatePending with ActiveAt = baseTime.
|
||||
_, err = rule.Eval(context.TODO(), 0, baseTime, q, nil, 0)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Len(t, rule.active, 1)
|
||||
var alert *Alert
|
||||
for _, a := range rule.active {
|
||||
alert = a
|
||||
}
|
||||
require.Equal(t, StatePending, alert.State)
|
||||
|
||||
// Eval at t=15s: the short holdDuration is met, alert transitions to firing.
|
||||
evalTime := baseTime.Add(shortHold)
|
||||
_, err = rule.Eval(context.TODO(), 0, evalTime, q, nil, 0)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, StateFiring, alert.State)
|
||||
require.False(t, alert.FiredAt.IsZero(), "FiredAt should be set after firing")
|
||||
|
||||
// Now increase the holdDuration to 1 hour (simulating a rule config reload).
|
||||
rule.holdDuration = longHold
|
||||
|
||||
// Eval at t=30s: only 30s have elapsed since ActiveAt, which is far less
|
||||
// than the new 1h holdDuration, so the alert must go back to pending.
|
||||
evalTime = baseTime.Add(30 * time.Second)
|
||||
res, err := rule.Eval(context.TODO(), 0, evalTime, q, nil, 0)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, StatePending, alert.State)
|
||||
require.True(t, alert.FiredAt.IsZero(), "FiredAt should be reset")
|
||||
require.True(t, alert.LastSentAt.IsZero(), "LastSentAt should be reset")
|
||||
require.True(t, alert.KeepFiringSince.IsZero(), "KeepFiringSince should be reset")
|
||||
|
||||
for _, smpl := range res {
|
||||
if smpl.Metric.Get("__name__") == "ALERTS" {
|
||||
require.Equal(t, "pending", smpl.Metric.Get("alertstate"))
|
||||
}
|
||||
}
|
||||
|
||||
// Eval at t=1h: now the new holdDuration is met, alert fires again.
|
||||
evalTime = baseTime.Add(longHold)
|
||||
res, err = rule.Eval(context.TODO(), 0, evalTime, q, nil, 0)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, StateFiring, alert.State)
|
||||
|
||||
for _, smpl := range res {
|
||||
if smpl.Metric.Get("__name__") == "ALERTS" {
|
||||
require.Equal(t, "firing", smpl.Metric.Get("alertstate"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user