fix(promql): histogram_fraction first bucket heuristic consistent with histogram_quantile

For classic histograms and NHCB (custom buckets) where the first bucket
has a non-positive upper boundary (-Inf, x] with x<=0, both
BucketQuantile and HistogramQuantile place all observations at the upper
boundary x. BucketFraction and HistogramFraction were inconsistent: they
treated observations as being at -Inf (contributing to every fraction
query whose lower bound fell inside the bucket).

Fix BucketFraction and HistogramFraction to use the same heuristic as
their quantile counterparts: observations in the first bucket with a
non-positive upper boundary are assumed to be at x (the upper boundary).
Observations in the first NHCB bucket with a positive upper boundary
retain the existing behaviour of assuming a lower boundary of 0.

Add missing test cases for histogram_fraction on NHCB histograms with
non-positive first bucket boundaries, and add cross-checks between
histogram_quantile and histogram_fraction for exponential native
histograms.

Coded with Claude Sonnet 4.6.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
This commit is contained in:
György Krajcsovits 2026-04-07 16:16:44 +02:00
parent 477bbb6b2c
commit 64f4adf5d9
No known key found for this signature in database
GPG Key ID: 47A8F9CE80FD7C7F
3 changed files with 94 additions and 22 deletions

View File

@ -184,17 +184,39 @@ load_with_nhcb 5m
negative_buckets_lower_falls_in_the_first_bucket_bucket{le="-1"} 15+0x10
negative_buckets_lower_falls_in_the_first_bucket_bucket{le="+Inf"} 100+0x10
# - Bucket [-Inf, -3]: contributes zero observations (no interpolation with infinite width bucket).
# - Bucket [-Inf, -3]: all 10 observations assumed at upper boundary -3, which is in [-4, -2].
# - Bucket [-3, -2]: contributes 12-10 = 2.0 observations (full bucket).
# Total: 2.0 / 100.0 = 0.02
# Total: 12.0 / 100.0 = 0.12
eval instant at 50m histogram_fraction(-4, -2, negative_buckets_lower_falls_in_the_first_bucket_bucket)
expect no_warn
{} 0.02
{} 0.12
# Check consistency with histogram_quantile. Quantile treats observations in (-Inf, -3] bucket as being at -3 boundary,
# therefore 0.10 quantile is -3. If it treated values in that bucket at -Inf, it would be -Inf. It follows that
# the fraction has to count those 10 observations at -3, not -Inf, otherwise fraction returns 0.
eval instant at 50m histogram_quantile(0.10, negative_buckets_lower_falls_in_the_first_bucket_bucket)
expect no_warn
{} -3
eval instant at 50m histogram_fraction(-4, -3, negative_buckets_lower_falls_in_the_first_bucket_bucket)
{} 0.10
eval instant at 50m histogram_quantile(0.10, negative_buckets_lower_falls_in_the_first_bucket)
expect no_warn
{} -3
eval instant at 50m histogram_fraction(-4, -3, negative_buckets_lower_falls_in_the_first_bucket)
{} 0.10
# For NHCB, observations in the first bucket [-Inf, -3] are assumed to be at the upper
# boundary -3, which lies in the query range [-4, -2]: 10 observations are included.
# - Bucket [-Inf, -3]: 10 observations assumed at -3 (in range).
# - Bucket [-3, -2]: 12-10 = 2.0 observations (full bucket).
# Total: 12.0 / 100.0 = 0.12
eval instant at 50m histogram_fraction(-4, -2, negative_buckets_lower_falls_in_the_first_bucket)
expect no_warn
{} 0.02
{} 0.12
# Lower is -Inf.
load_with_nhcb 5m

View File

@ -66,6 +66,14 @@ eval instant at 1m histogram_quantiles(single_histogram, "q", 0.5)
expect no_info
{q="0.5"} 1.414213562373095
# Consistency check: histogram_fraction with upper=histogram_quantile(0.5) must return 0.5.
# 1 obs in bucket below 1 is fully counted; 2 obs in (1,2] use exponential interpolation:
# log2(sqrt(2))=0.5, log2(1)=0, log2(2)=1, fraction=(0.5-0)/(1-0)=0.5, contributing 1 obs.
# Total: (1+1)/4 = 0.5.
eval instant at 1m histogram_fraction(0, 1.414213562373095, single_histogram)
expect no_info
{} 0.5
clear
# Repeat the same histogram 10 times.
@ -238,6 +246,13 @@ eval instant at 1m histogram_fraction(-2, -1, negative_histogram)
eval instant at 1m histogram_quantile(0.5, negative_histogram)
{} -1.414213562373095
# Consistency check: histogram_fraction with upper=histogram_quantile(0.5) must return 0.5.
# 1 obs in (-4,-2] is fully ≤ -sqrt(2); 2 obs in (-2,-1] use exponential interpolation:
# log2(sqrt(2))=0.5, log2(2)=1, log2(1)=0, fraction=1-(0.5-0)/(1-0)=0.5, contributing 1 obs.
# Total: (1+1)/4 = 0.5.
eval instant at 1m histogram_fraction(-Inf, -1.414213562373095, negative_histogram)
{} 0.5
clear
# Two histogram samples.
@ -1080,6 +1095,17 @@ load 5m
eval instant at 5m histogram_fraction(5, 10, custom_buckets_histogram)
{} 0.5
# The first bucket (-Inf, 5] has lower boundary -Inf. Since all custom values are
# positive, the lower boundary is assumed to be 0 for interpolation. Linear
# interpolation in (0, 5]: 1 obs, 2.5/5 * 1 = 0.5 obs below 2.5, fraction = 0.5/4.
eval instant at 5m histogram_fraction(-Inf, 2.5, custom_buckets_histogram)
{} 0.125
# Same result: lower=-1 is below the assumed lower bound of 0, so no observations
# are assumed to be below -1 in the first bucket.
eval instant at 5m histogram_fraction(-1, 2.5, custom_buckets_histogram)
{} 0.125
eval instant at 5m histogram_quantile(0.5, custom_buckets_histogram)
{} 7.5
@ -1088,6 +1114,25 @@ eval instant at 5m sum(custom_buckets_histogram)
clear
# Test histogram_fraction for NHCB with non-positive first bucket upper boundary.
# Observations in the first bucket are assumed to be at its upper boundary.
load 1m
nhcb_neg {{schema:-53 sum:-10 count:10 custom_values:[-1] buckets:[10 0]}}
# All observations assumed at -1. upper=-1.5 < -1, so no observations are ≤ -1.5.
eval instant at 1m histogram_fraction(-Inf, -1.5, nhcb_neg)
{} 0
# All observations at -1, which equals the upper bound: fraction = 1.
eval instant at 1m histogram_fraction(-Inf, -1, nhcb_neg)
{} 1
# Observations at -1 fall within [-2, -0.5], so fraction = 1.
eval instant at 1m histogram_fraction(-2, -0.5, nhcb_neg)
{} 1
clear
# Test 'this native histogram metric is not a counter' warning for rate
load 30s
some_metric {{schema:0 sum:1 count:1 buckets:[1] counter_reset_hint:gauge}} {{schema:0 sum:2 count:2 buckets:[2] counter_reset_hint:gauge}} {{schema:0 sum:3 count:3 buckets:[3] counter_reset_hint:gauge}}

View File

@ -415,16 +415,24 @@ func HistogramFraction(lower, upper float64, h *histogram.FloatHistogram, metric
// histograms. It is also used for the zero bucket.
interpolateLinearly := func(v float64) float64 {
// Note: `v` is a finite value.
// For buckets with infinite bounds, we cannot interpolate meaningfully.
// For +Inf upper bound, interpolation returns the cumulative count of the previous bucket
// as the second term in the interpolation formula yields 0 (finite/Inf).
// In other words, no observations from the last bucket are considered in the fraction calculation.
// For -Inf lower bound, however, the second term would be (v-(-Inf))/(upperBound-(-Inf)) = Inf/Inf = NaN.
// To achieve the same effect of no contribution as the +Inf bucket, handle the -Inf case by returning
// the cumulative count at the first bucket (which equals the bucket's count).
// In both cases, we effectively skip interpolation within the infinite-width bucket.
// For NHCB buckets with infinite boundaries, apply the same heuristics
// as HistogramQuantile to keep the two functions consistent inverses of
// each other.
if b.Lower == math.Inf(-1) {
return b.Count
// First NHCB bucket with lower boundary -Inf.
if b.Upper > 0 {
// All custom values are positive: lower boundary assumed to be 0.
if v <= 0 {
return rank
}
return rank + b.Count*v/b.Upper
}
// At least one custom value is zero or negative: all observations
// are assumed to be at the upper boundary.
if v >= b.Upper {
return rank + b.Count
}
return rank
}
return rank + b.Count*(v-b.Lower)/(b.Upper-b.Lower)
}
@ -568,16 +576,13 @@ func BucketFraction(lower, upper float64, buckets Buckets) float64 {
interpolateLinearly := func(v float64) float64 {
// Note: `v` is a finite value.
// For buckets with infinite bounds, we cannot interpolate meaningfully.
// For +Inf upper bound, interpolation returns the cumulative count of the previous bucket
// as the second term in the interpolation formula yields 0 (finite/Inf).
// In other words, no observations from the last bucket are considered in the fraction calculation.
// For -Inf lower bound, however, the second term would be (v-(-Inf))/(upperBound-(-Inf)) = Inf/Inf = NaN.
// To achieve the same effect of no contribution as the +Inf bucket, handle the -Inf case by returning
// the cumulative count at the first bucket.
// In both cases, we effectively skip interpolation within the infinite-width bucket.
// For the first bucket with lower boundary -Inf, apply the same heuristic
// as BucketQuantile: all observations are assumed to be at the upper boundary.
if lowerBound == math.Inf(-1) {
return b.Count
if v >= upperBound {
return b.Count
}
return rank
}
return rank + (b.Count-rank)*(v-lowerBound)/(upperBound-lowerBound)
}