promql: histogram_fraction for bucket histograms (#16095)

* promql: histogram_fraction for bucket histograms This PR extends the histogram_fraction function to also work with classic bucket histograms. This is beneficial because it allows expressions like sum(increase(my_bucket{le="0.5"}[10m]))/sum(increase(my_total[10m])) to be written without knowing the actual values of the "le" label, easing the transition to native histograms later on. It also feels natural since histogram_quantile also can deal with classic histograms. Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> * promql: histogram_fraction for bucket histograms * Add documentation and reduce code duplication * Fix a bug in linear interpolation between bucket boundaries * Add more PromQL tests Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> * Update docs/querying/functions.md Co-authored-by: Björn Rabenstein <github@rabenste.in> Signed-off-by: Michael Hoffmann <mhoffm@posteo.de> --------- Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> Signed-off-by: Michael Hoffmann <mhoffm@posteo.de> Co-authored-by: Björn Rabenstein <github@rabenste.in>
2025-08-07 06:37:17 +02:00 · 2025-04-23 00:28:31 +02:00 · 2025-04-23 00:28:31 +02:00 · d6d9f97bac
commit d6d9f97bac
parent d9c0ad1e61
5 changed files with 209 additions and 66 deletions
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@ -253,10 +253,23 @@ histogram samples:
 ## `histogram_fraction()`
-`histogram_fraction(lower scalar, upper scalar, v instant-vector)` returns the
+`histogram_fraction(lower scalar, upper scalar, b instant-vector)` returns the
 estimated fraction of observations between the provided lower and upper values
-for each histogram sample in `v`. Float samples are ignored and do not show up
+for each classic or native histogram contained in `b`. Float samples in `b` are 
-in the returned vector.
+considered the counts of observations in each bucket of one or more classic
 histograms, while native histogram samples in `b` are treated each individually 
 as a separate histogram. This works in the same way as for `histogram_quantile()`.
 (See there for more details.)
 If the provided lower and upper values do not coincide with bucket boundaries,
 the calculated fraction is an estimate, using the same interpolation method as for
 `histogram_quantile()`. (See there for more details.) Especially with classic
 histograms, it is easy to accidentally pick lower or upper values that are very
 far away from any bucket boundary, leading to large margins of error. Rather than
 using `histogram_fraction()` with classic histograms, it is often a more robust approach
 to directly act on the bucket series when calculating fractions. See the
 [calculation of the Apdex scare](https://prometheus.io/docs/practices/histograms/#apdex-score)
 as a typical example.
 For example, the following expression calculates the fraction of HTTP requests
 over the last hour that took 200ms or less:
@ -280,8 +293,8 @@ feature inclusive upper boundaries and exclusive lower boundaries for positive
 values, and vice versa for negative values.) Without a precise alignment of
 boundaries, the function uses interpolation to estimate the fraction. With the
 resulting uncertainty, it becomes irrelevant if the boundaries are inclusive or
-exclusive. The interpolation method is the same as the one used for
+exclusive.
-`histogram_quantile()`. See there for more details.
+
 ## `histogram_quantile()`
--- a/promql/engine.go
+++ b/promql/engine.go
@ -1137,8 +1137,9 @@ type EvalNodeHelper struct {
 	Out Vector
 	// Caches.
-	// funcHistogramQuantile for classic histograms.
+	// funcHistogramQuantile and funcHistogramFraction for classic histograms.
 	signatureToMetricWithBuckets map[string]*metricWithBuckets
 	nativeHistogramSamples       []Sample
 	lb           *labels.Builder
 	lblBuf       []byte
@ -1161,6 +1162,62 @@ func (enh *EvalNodeHelper) resetBuilder(lbls labels.Labels) {
 	}
 }
 // resetHistograms prepares the histogram caches by splitting the given vector into native and classic histograms.
 func (enh *EvalNodeHelper) resetHistograms(inVec Vector, arg parser.Expr) annotations.Annotations {
 	var annos annotations.Annotations
 	if enh.signatureToMetricWithBuckets == nil {
 		enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
 	} else {
 		for _, v := range enh.signatureToMetricWithBuckets {
 			v.buckets = v.buckets[:0]
 		}
 	}
 	enh.nativeHistogramSamples = enh.nativeHistogramSamples[:0]
 	for _, sample := range inVec {
 		// We are only looking for classic buckets here. Remember
 		// the histograms for later treatment.
 		if sample.H != nil {
 			enh.nativeHistogramSamples = append(enh.nativeHistogramSamples, sample)
 			continue
 		}
 		upperBound, err := strconv.ParseFloat(
 			sample.Metric.Get(model.BucketLabel), 64,
 		)
 		if err != nil {
 			annos.Add(annotations.NewBadBucketLabelWarning(sample.Metric.Get(labels.MetricName), sample.Metric.Get(model.BucketLabel), arg.PositionRange()))
 			continue
 		}
 		enh.lblBuf = sample.Metric.BytesWithoutLabels(enh.lblBuf, labels.BucketLabel)
 		mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]
 		if !ok {
 			sample.Metric = labels.NewBuilder(sample.Metric).
 				Del(excludedLabels...).
 				Labels()
 			mb = &metricWithBuckets{sample.Metric, nil}
 			enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb
 		}
 		mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F})
 	}
 	for _, sample := range enh.nativeHistogramSamples {
 		// We have to reconstruct the exact same signature as above for
 		// a classic histogram, just ignoring any le label.
 		enh.lblBuf = sample.Metric.Bytes(enh.lblBuf)
 		if mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]; ok && len(mb.buckets) > 0 {
 			// At this data point, we have classic histogram
 			// buckets and a native histogram with the same name and
 			// labels. Do not evaluate anything.
 			annos.Add(annotations.NewMixedClassicNativeHistogramsWarning(sample.Metric.Get(labels.MetricName), arg.PositionRange()))
 			delete(enh.signatureToMetricWithBuckets, string(enh.lblBuf))
 			continue
 		}
 	}
 	return annos
 }
 // rangeEval evaluates the given expressions, and then for each step calls
 // the given funcCall with the values computed for each expression at that
 // step. The return value is the combination into time series of all the
--- a/promql/functions.go
+++ b/promql/functions.go
@ -20,7 +20,6 @@ import (
 	"math"
 	"slices"
 	"sort"
 	"strconv"
 	"strings"
 	"time"
@ -1390,16 +1389,15 @@ func funcHistogramStdVar(vals []parser.Value, _ parser.Expressions, enh *EvalNod
 }
 // === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
-func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
+func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	lower := vals[0].(Vector)[0].F
 	upper := vals[1].(Vector)[0].F
 	inVec := vals[2].(Vector)
-	for _, sample := range inVec {
+	annos := enh.resetHistograms(inVec, args[2])
-		// Skip non-histogram samples.
+
-		if sample.H == nil {
+	// Deal with the native histograms.
-			continue
+	for _, sample := range enh.nativeHistogramSamples {
 		}
 		if !enh.enableDelayedNameRemoval {
 			sample.Metric = sample.Metric.DropMetricName()
 		}
@ -1409,7 +1407,24 @@ func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalN
 			DropName: true,
 		})
 	}
-	return enh.Out, nil
+
 	// Deal with classic histograms that have already been filtered for conflicting native histograms.
 	for _, mb := range enh.signatureToMetricWithBuckets {
 		if len(mb.buckets) == 0 {
 			continue
 		}
 		if !enh.enableDelayedNameRemoval {
 			mb.metric = mb.metric.DropMetricName()
 		}
 		enh.Out = append(enh.Out, Sample{
 			Metric:   mb.metric,
 			F:        BucketFraction(lower, upper, mb.buckets),
 			DropName: true,
 		})
 	}
 	return enh.Out, annos
 }
 // === histogram_quantile(k parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
@ -1421,58 +1436,10 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 	if math.IsNaN(q) || q < 0 || q > 1 {
 		annos.Add(annotations.NewInvalidQuantileWarning(q, args[0].PositionRange()))
 	}
 	annos.Merge(enh.resetHistograms(inVec, args[1]))
-	if enh.signatureToMetricWithBuckets == nil {
+	// Deal with the native histograms.
-		enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
+	for _, sample := range enh.nativeHistogramSamples {
 	} else {
 		for _, v := range enh.signatureToMetricWithBuckets {
 			v.buckets = v.buckets[:0]
 		}
 	}
 	var histogramSamples []Sample
 	for _, sample := range inVec {
 		// We are only looking for classic buckets here. Remember
 		// the histograms for later treatment.
 		if sample.H != nil {
 			histogramSamples = append(histogramSamples, sample)
 			continue
 		}
 		upperBound, err := strconv.ParseFloat(
 			sample.Metric.Get(model.BucketLabel), 64,
 		)
 		if err != nil {
 			annos.Add(annotations.NewBadBucketLabelWarning(sample.Metric.Get(labels.MetricName), sample.Metric.Get(model.BucketLabel), args[1].PositionRange()))
 			continue
 		}
 		enh.lblBuf = sample.Metric.BytesWithoutLabels(enh.lblBuf, labels.BucketLabel)
 		mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]
 		if !ok {
 			sample.Metric = labels.NewBuilder(sample.Metric).
 				Del(excludedLabels...).
 				Labels()
 			mb = &metricWithBuckets{sample.Metric, nil}
 			enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb
 		}
 		mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F})
 	}
 	// Now deal with the native histograms.
 	for _, sample := range histogramSamples {
 		// We have to reconstruct the exact same signature as above for
 		// a classic histogram, just ignoring any le label.
 		enh.lblBuf = sample.Metric.Bytes(enh.lblBuf)
 		if mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]; ok && len(mb.buckets) > 0 {
 			// At this data point, we have classic histogram
 			// buckets and a native histogram with the same name and
 			// labels. Do not evaluate anything.
 			annos.Add(annotations.NewMixedClassicNativeHistogramsWarning(sample.Metric.Get(labels.MetricName), args[1].PositionRange()))
 			delete(enh.signatureToMetricWithBuckets, string(enh.lblBuf))
 			continue
 		}
 		if !enh.enableDelayedNameRemoval {
 			sample.Metric = sample.Metric.DropMetricName()
 		}
@ -1483,7 +1450,7 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 		})
 	}
-	// Now do classic histograms that have already been filtered for conflicting native histograms.
+	// Deal with classic histograms that have already been filtered for conflicting native histograms.
 	for _, mb := range enh.signatureToMetricWithBuckets {
 		if len(mb.buckets) > 0 {
 			res, forcedMonotonicity, _ := BucketQuantile(q, mb.buckets)
--- a/promql/promqltest/testdata/histograms.test
+++ b/promql/promqltest/testdata/histograms.test
@ -104,15 +104,43 @@ eval instant at 50m histogram_stdvar(testhistogram3)
 	{start="negative"} 17.495112615949154
 # Test histogram_fraction.
 #
 eval instant at 50m histogram_fraction(0, 4, testhistogram2)
 	{} 0.6666666666666666
 eval instant at 50m histogram_fraction(0, 4, testhistogram2_bucket)
 	{} 0.6666666666666666
 eval instant at 50m histogram_fraction(0, 6, testhistogram2)
 	{} 1
 eval instant at 50m histogram_fraction(0, 6, testhistogram2_bucket)
 	{} 1
 eval instant at 50m histogram_fraction(0, 3.5, testhistogram2)
 	{} 0.5833333333333334
 eval instant at 50m histogram_fraction(0, 3.5, testhistogram2_bucket)
 	{} 0.5833333333333334
 eval instant at 50m histogram_fraction(0, 0.2, testhistogram3)
 	{start="positive"} 0.6363636363636364
 	{start="negative"} 0
 eval instant at 50m histogram_fraction(0, 0.2, testhistogram3_bucket)
 	{start="positive"} 0.6363636363636364
 	{start="negative"} 0
 eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[10m]))
 	{start="positive"} 0.6363636363636364
 	{start="negative"} 0
 eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3_bucket[10m]))
 	{start="positive"} 0.6363636363636364
 	{start="negative"} 0
 # In the classic histogram, we can access the corresponding bucket (if
 # it exists) and divide by the count to get the same result.
--- a/promql/quantile.go
+++ b/promql/quantile.go
@ -448,6 +448,84 @@ func HistogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
 	return (upperRank - lowerRank) / h.Count
 }
 // BucketFraction is a version of HistogramFraction for classic histograms.
 func BucketFraction(lower, upper float64, buckets Buckets) float64 {
 	slices.SortFunc(buckets, func(a, b Bucket) int {
 		// We don't expect the bucket boundary to be a NaN.
 		if a.UpperBound < b.UpperBound {
 			return -1
 		}
 		if a.UpperBound > b.UpperBound {
 			return +1
 		}
 		return 0
 	})
 	if !math.IsInf(buckets[len(buckets)-1].UpperBound, +1) {
 		return math.NaN()
 	}
 	buckets = coalesceBuckets(buckets)
 	count := buckets[len(buckets)-1].Count
 	if count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
 		return math.NaN()
 	}
 	if lower >= upper {
 		return 0
 	}
 	var (
 		rank, lowerRank, upperRank float64
 		lowerSet, upperSet         bool
 	)
 	for i, b := range buckets {
 		lowerBound := math.Inf(-1)
 		if i > 0 {
 			lowerBound = buckets[i-1].UpperBound
 		}
 		upperBound := b.UpperBound
 		interpolateLinearly := func(v float64) float64 {
 			return rank + (b.Count-rank)*(v-lowerBound)/(upperBound-lowerBound)
 		}
 		if !lowerSet && lowerBound >= lower {
 			// We have hit the lower value at the lower bucket boundary.
 			lowerRank = rank
 			lowerSet = true
 		}
 		if !upperSet && lowerBound >= upper {
 			// We have hit the upper value at the lower bucket boundary.
 			upperRank = rank
 			upperSet = true
 		}
 		if lowerSet && upperSet {
 			break
 		}
 		if !lowerSet && lowerBound < lower && upperBound > lower {
 			// The lower value is in this bucket.
 			lowerRank = interpolateLinearly(lower)
 			lowerSet = true
 		}
 		if !upperSet && lowerBound < upper && upperBound > upper {
 			// The upper value is in this bucket.
 			upperRank = interpolateLinearly(upper)
 			upperSet = true
 		}
 		if lowerSet && upperSet {
 			break
 		}
 		rank = b.Count
 	}
 	if !lowerSet || lowerRank > count {
 		lowerRank = count
 	}
 	if !upperSet || upperRank > count {
 		upperRank = count
 	}
 	return (upperRank - lowerRank) / count
 }
 // coalesceBuckets merges buckets with the same upper bound.
 //
 // The input buckets must be sorted.