promql: Prevent extrapolation below zero for histogram count

This deals with the count field of native histograms in the same way
as with simple float counters. It then scale the whole histogram with
the same factor as it has scaled the count. This will still allow
individual buckets to get extrapolated below zero, but maybe that is
fine.

This implements approach (2) as described in
https://github.com/prometheus/prometheus/issues/15976#issuecomment-3032095158

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2025-07-03 16:47:37 +02:00
parent c565e95808
commit bcf7a822a0
2 changed files with 27 additions and 14 deletions

View File

@ -144,32 +144,37 @@ func extrapolatedRate(vals []parser.Value, args parser.Expressions, enh *EvalNod
// (which is our guess for where the series actually starts or ends).
extrapolationThreshold := averageDurationBetweenSamples * 1.1
extrapolateToInterval := sampledInterval
if durationToStart >= extrapolationThreshold {
durationToStart = averageDurationBetweenSamples / 2
}
if isCounter && resultFloat > 0 && len(samples.Floats) > 0 && samples.Floats[0].F >= 0 {
if isCounter {
// Counters cannot be negative. If we have any slope at all
// (i.e. resultFloat went up), we can extrapolate the zero point
// of the counter. If the duration to the zero point is shorter
// than the durationToStart, we take the zero point as the start
// of the series, thereby avoiding extrapolation to negative
// counter values.
// TODO(beorn7): Do this for histograms, too.
durationToZero := sampledInterval * (samples.Floats[0].F / resultFloat)
durationToZero := durationToStart
if resultFloat > 0 &&
len(samples.Floats) > 0 &&
samples.Floats[0].F >= 0 {
durationToZero = sampledInterval * (samples.Floats[0].F / resultFloat)
} else if resultHistogram != nil &&
resultHistogram.Count > 0 &&
len(samples.Histograms) > 0 &&
samples.Histograms[0].H.Count >= 0 {
durationToZero = sampledInterval * (samples.Histograms[0].H.Count / resultHistogram.Count)
}
if durationToZero < durationToStart {
durationToStart = durationToZero
}
}
extrapolateToInterval += durationToStart
if durationToEnd >= extrapolationThreshold {
durationToEnd = averageDurationBetweenSamples / 2
}
extrapolateToInterval += durationToEnd
factor := extrapolateToInterval / sampledInterval
factor := (sampledInterval + durationToStart + durationToEnd) / sampledInterval
if isRate {
factor /= ms.Range.Seconds()
}

View File

@ -1041,11 +1041,15 @@ eval_warn instant at 1m rate(some_metric[1m30s])
eval_warn instant at 1m30s rate(some_metric[1m30s])
# Should produce no results.
# Start with custom, end with exponential. Return the exponential histogram divided by 30.
# Start with custom, end with exponential. Return the exponential histogram divided by 48.
# (The 1st sample is the NHCB with count:1. It is mostly ignored with the exception of the
# count, which means the rate calculation extrapolates until the count hits 0.)
eval instant at 1m rate(some_metric[1m])
{} {{schema:0 sum:0.16666666666666666 count:0.13333333333333333 buckets:[0.03333333333333333 0.06666666666666667 0.03333333333333333]}}
{} {{count:0.08333333333333333 sum:0.10416666666666666 counter_reset_hint:gauge buckets:[0.020833333333333332 0.041666666666666664 0.020833333333333332]}}
# Start with exponential, end with custom. Return the custom buckets histogram divided by 30.
# (With the 2nd sample having a count of 1, the extrapolation to zero lands exactly at the
# left boundary of the range, so no extrapolation limitation needed.)
eval instant at 30s rate(some_metric[1m])
{} {{schema:-53 sum:0.03333333333333333 count:0.03333333333333333 custom_values:[5 10] buckets:[0.03333333333333333]}}
@ -1376,21 +1380,25 @@ eval instant at 1m histogram_fraction(-Inf, +Inf, histogram_nan)
clear
# Tests to demonstrate how an extrapolation below zero is prevented for a float counter, but not for native histograms.
# I.e. the float counter that behaves the same as the histogram count might yield a different result after `increase`.
# Tests to demonstrate how an extrapolation below zero is prevented for both float counters and native counter histograms.
# Note that the float counter behaves the same as the histogram count after `increase`.
load 1m
metric{type="histogram"} {{schema:0 count:15 sum:25 buckets:[5 10]}} {{schema:0 count:2490 sum:75 buckets:[15 2475]}}x55
metric{type="counter"} 15 2490x55
# End of range coincides with sample. Zero point of count is reached within the range.
# Note that the 2nd bucket has an exaggerated increase of 2479.939393939394 (although
# it has a value of only 2475 at the end of the range).
eval instant at 55m increase(metric[90m])
{type="histogram"} {{count:2497.5 sum:50.45454545454545 counter_reset_hint:gauge buckets:[10.09090909090909 2487.409090909091]}}
{type="histogram"} {{count:2490 sum:50.303030303030305 counter_reset_hint:gauge buckets:[10.06060606060606 2479.939393939394]}}
{type="counter"} 2490
# End of range does not coincide with sample. Zero point of count is reached within the range.
# The 2nd bucket again has an exaggerated increase, but it is less obvious because of the
# right-side extrapolation.
eval instant at 54m30s increase(metric[90m])
{type="histogram"} {{count:2520.8333333333335 sum:50.92592592592593 counter_reset_hint:gauge buckets:[10.185185185185187 2510.6481481481483]}}
{type="histogram"} {{count:2512.9166666666665 sum:50.76599326599326 counter_reset_hint:gauge buckets:[10.153198653198652 2502.7634680134674]}}
{type="counter"} 2512.9166666666665
# End of range coincides with sample. Zero point of count is reached outside of (i.e. before) the range.