mirror of
https://github.com/prometheus/prometheus.git
synced 2025-08-06 06:07:11 +02:00
promql: histogram_fraction for bucket histograms (#16095)
* promql: histogram_fraction for bucket histograms This PR extends the histogram_fraction function to also work with classic bucket histograms. This is beneficial because it allows expressions like sum(increase(my_bucket{le="0.5"}[10m]))/sum(increase(my_total[10m])) to be written without knowing the actual values of the "le" label, easing the transition to native histograms later on. It also feels natural since histogram_quantile also can deal with classic histograms. Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> * promql: histogram_fraction for bucket histograms * Add documentation and reduce code duplication * Fix a bug in linear interpolation between bucket boundaries * Add more PromQL tests Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> * Update docs/querying/functions.md Co-authored-by: Björn Rabenstein <github@rabenste.in> Signed-off-by: Michael Hoffmann <mhoffm@posteo.de> --------- Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> Signed-off-by: Michael Hoffmann <mhoffm@posteo.de> Co-authored-by: Björn Rabenstein <github@rabenste.in>
This commit is contained in:
parent
d9c0ad1e61
commit
d6d9f97bac
@ -253,10 +253,23 @@ histogram samples:
|
||||
|
||||
## `histogram_fraction()`
|
||||
|
||||
`histogram_fraction(lower scalar, upper scalar, v instant-vector)` returns the
|
||||
`histogram_fraction(lower scalar, upper scalar, b instant-vector)` returns the
|
||||
estimated fraction of observations between the provided lower and upper values
|
||||
for each histogram sample in `v`. Float samples are ignored and do not show up
|
||||
in the returned vector.
|
||||
for each classic or native histogram contained in `b`. Float samples in `b` are
|
||||
considered the counts of observations in each bucket of one or more classic
|
||||
histograms, while native histogram samples in `b` are treated each individually
|
||||
as a separate histogram. This works in the same way as for `histogram_quantile()`.
|
||||
(See there for more details.)
|
||||
|
||||
If the provided lower and upper values do not coincide with bucket boundaries,
|
||||
the calculated fraction is an estimate, using the same interpolation method as for
|
||||
`histogram_quantile()`. (See there for more details.) Especially with classic
|
||||
histograms, it is easy to accidentally pick lower or upper values that are very
|
||||
far away from any bucket boundary, leading to large margins of error. Rather than
|
||||
using `histogram_fraction()` with classic histograms, it is often a more robust approach
|
||||
to directly act on the bucket series when calculating fractions. See the
|
||||
[calculation of the Apdex scare](https://prometheus.io/docs/practices/histograms/#apdex-score)
|
||||
as a typical example.
|
||||
|
||||
For example, the following expression calculates the fraction of HTTP requests
|
||||
over the last hour that took 200ms or less:
|
||||
@ -280,8 +293,8 @@ feature inclusive upper boundaries and exclusive lower boundaries for positive
|
||||
values, and vice versa for negative values.) Without a precise alignment of
|
||||
boundaries, the function uses interpolation to estimate the fraction. With the
|
||||
resulting uncertainty, it becomes irrelevant if the boundaries are inclusive or
|
||||
exclusive. The interpolation method is the same as the one used for
|
||||
`histogram_quantile()`. See there for more details.
|
||||
exclusive.
|
||||
|
||||
|
||||
## `histogram_quantile()`
|
||||
|
||||
|
@ -1137,8 +1137,9 @@ type EvalNodeHelper struct {
|
||||
Out Vector
|
||||
|
||||
// Caches.
|
||||
// funcHistogramQuantile for classic histograms.
|
||||
// funcHistogramQuantile and funcHistogramFraction for classic histograms.
|
||||
signatureToMetricWithBuckets map[string]*metricWithBuckets
|
||||
nativeHistogramSamples []Sample
|
||||
|
||||
lb *labels.Builder
|
||||
lblBuf []byte
|
||||
@ -1161,6 +1162,62 @@ func (enh *EvalNodeHelper) resetBuilder(lbls labels.Labels) {
|
||||
}
|
||||
}
|
||||
|
||||
// resetHistograms prepares the histogram caches by splitting the given vector into native and classic histograms.
|
||||
func (enh *EvalNodeHelper) resetHistograms(inVec Vector, arg parser.Expr) annotations.Annotations {
|
||||
var annos annotations.Annotations
|
||||
|
||||
if enh.signatureToMetricWithBuckets == nil {
|
||||
enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
|
||||
} else {
|
||||
for _, v := range enh.signatureToMetricWithBuckets {
|
||||
v.buckets = v.buckets[:0]
|
||||
}
|
||||
}
|
||||
enh.nativeHistogramSamples = enh.nativeHistogramSamples[:0]
|
||||
|
||||
for _, sample := range inVec {
|
||||
// We are only looking for classic buckets here. Remember
|
||||
// the histograms for later treatment.
|
||||
if sample.H != nil {
|
||||
enh.nativeHistogramSamples = append(enh.nativeHistogramSamples, sample)
|
||||
continue
|
||||
}
|
||||
|
||||
upperBound, err := strconv.ParseFloat(
|
||||
sample.Metric.Get(model.BucketLabel), 64,
|
||||
)
|
||||
if err != nil {
|
||||
annos.Add(annotations.NewBadBucketLabelWarning(sample.Metric.Get(labels.MetricName), sample.Metric.Get(model.BucketLabel), arg.PositionRange()))
|
||||
continue
|
||||
}
|
||||
enh.lblBuf = sample.Metric.BytesWithoutLabels(enh.lblBuf, labels.BucketLabel)
|
||||
mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]
|
||||
if !ok {
|
||||
sample.Metric = labels.NewBuilder(sample.Metric).
|
||||
Del(excludedLabels...).
|
||||
Labels()
|
||||
mb = &metricWithBuckets{sample.Metric, nil}
|
||||
enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb
|
||||
}
|
||||
mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F})
|
||||
}
|
||||
|
||||
for _, sample := range enh.nativeHistogramSamples {
|
||||
// We have to reconstruct the exact same signature as above for
|
||||
// a classic histogram, just ignoring any le label.
|
||||
enh.lblBuf = sample.Metric.Bytes(enh.lblBuf)
|
||||
if mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]; ok && len(mb.buckets) > 0 {
|
||||
// At this data point, we have classic histogram
|
||||
// buckets and a native histogram with the same name and
|
||||
// labels. Do not evaluate anything.
|
||||
annos.Add(annotations.NewMixedClassicNativeHistogramsWarning(sample.Metric.Get(labels.MetricName), arg.PositionRange()))
|
||||
delete(enh.signatureToMetricWithBuckets, string(enh.lblBuf))
|
||||
continue
|
||||
}
|
||||
}
|
||||
return annos
|
||||
}
|
||||
|
||||
// rangeEval evaluates the given expressions, and then for each step calls
|
||||
// the given funcCall with the values computed for each expression at that
|
||||
// step. The return value is the combination into time series of all the
|
||||
|
@ -20,7 +20,6 @@ import (
|
||||
"math"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -1390,16 +1389,15 @@ func funcHistogramStdVar(vals []parser.Value, _ parser.Expressions, enh *EvalNod
|
||||
}
|
||||
|
||||
// === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
|
||||
func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
|
||||
func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
|
||||
lower := vals[0].(Vector)[0].F
|
||||
upper := vals[1].(Vector)[0].F
|
||||
inVec := vals[2].(Vector)
|
||||
|
||||
for _, sample := range inVec {
|
||||
// Skip non-histogram samples.
|
||||
if sample.H == nil {
|
||||
continue
|
||||
}
|
||||
annos := enh.resetHistograms(inVec, args[2])
|
||||
|
||||
// Deal with the native histograms.
|
||||
for _, sample := range enh.nativeHistogramSamples {
|
||||
if !enh.enableDelayedNameRemoval {
|
||||
sample.Metric = sample.Metric.DropMetricName()
|
||||
}
|
||||
@ -1409,7 +1407,24 @@ func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalN
|
||||
DropName: true,
|
||||
})
|
||||
}
|
||||
return enh.Out, nil
|
||||
|
||||
// Deal with classic histograms that have already been filtered for conflicting native histograms.
|
||||
for _, mb := range enh.signatureToMetricWithBuckets {
|
||||
if len(mb.buckets) == 0 {
|
||||
continue
|
||||
}
|
||||
if !enh.enableDelayedNameRemoval {
|
||||
mb.metric = mb.metric.DropMetricName()
|
||||
}
|
||||
|
||||
enh.Out = append(enh.Out, Sample{
|
||||
Metric: mb.metric,
|
||||
F: BucketFraction(lower, upper, mb.buckets),
|
||||
DropName: true,
|
||||
})
|
||||
}
|
||||
|
||||
return enh.Out, annos
|
||||
}
|
||||
|
||||
// === histogram_quantile(k parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
|
||||
@ -1421,58 +1436,10 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
|
||||
if math.IsNaN(q) || q < 0 || q > 1 {
|
||||
annos.Add(annotations.NewInvalidQuantileWarning(q, args[0].PositionRange()))
|
||||
}
|
||||
annos.Merge(enh.resetHistograms(inVec, args[1]))
|
||||
|
||||
if enh.signatureToMetricWithBuckets == nil {
|
||||
enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
|
||||
} else {
|
||||
for _, v := range enh.signatureToMetricWithBuckets {
|
||||
v.buckets = v.buckets[:0]
|
||||
}
|
||||
}
|
||||
|
||||
var histogramSamples []Sample
|
||||
|
||||
for _, sample := range inVec {
|
||||
// We are only looking for classic buckets here. Remember
|
||||
// the histograms for later treatment.
|
||||
if sample.H != nil {
|
||||
histogramSamples = append(histogramSamples, sample)
|
||||
continue
|
||||
}
|
||||
|
||||
upperBound, err := strconv.ParseFloat(
|
||||
sample.Metric.Get(model.BucketLabel), 64,
|
||||
)
|
||||
if err != nil {
|
||||
annos.Add(annotations.NewBadBucketLabelWarning(sample.Metric.Get(labels.MetricName), sample.Metric.Get(model.BucketLabel), args[1].PositionRange()))
|
||||
continue
|
||||
}
|
||||
enh.lblBuf = sample.Metric.BytesWithoutLabels(enh.lblBuf, labels.BucketLabel)
|
||||
mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]
|
||||
if !ok {
|
||||
sample.Metric = labels.NewBuilder(sample.Metric).
|
||||
Del(excludedLabels...).
|
||||
Labels()
|
||||
mb = &metricWithBuckets{sample.Metric, nil}
|
||||
enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb
|
||||
}
|
||||
mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F})
|
||||
}
|
||||
|
||||
// Now deal with the native histograms.
|
||||
for _, sample := range histogramSamples {
|
||||
// We have to reconstruct the exact same signature as above for
|
||||
// a classic histogram, just ignoring any le label.
|
||||
enh.lblBuf = sample.Metric.Bytes(enh.lblBuf)
|
||||
if mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]; ok && len(mb.buckets) > 0 {
|
||||
// At this data point, we have classic histogram
|
||||
// buckets and a native histogram with the same name and
|
||||
// labels. Do not evaluate anything.
|
||||
annos.Add(annotations.NewMixedClassicNativeHistogramsWarning(sample.Metric.Get(labels.MetricName), args[1].PositionRange()))
|
||||
delete(enh.signatureToMetricWithBuckets, string(enh.lblBuf))
|
||||
continue
|
||||
}
|
||||
|
||||
// Deal with the native histograms.
|
||||
for _, sample := range enh.nativeHistogramSamples {
|
||||
if !enh.enableDelayedNameRemoval {
|
||||
sample.Metric = sample.Metric.DropMetricName()
|
||||
}
|
||||
@ -1483,7 +1450,7 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
|
||||
})
|
||||
}
|
||||
|
||||
// Now do classic histograms that have already been filtered for conflicting native histograms.
|
||||
// Deal with classic histograms that have already been filtered for conflicting native histograms.
|
||||
for _, mb := range enh.signatureToMetricWithBuckets {
|
||||
if len(mb.buckets) > 0 {
|
||||
res, forcedMonotonicity, _ := BucketQuantile(q, mb.buckets)
|
||||
|
28
promql/promqltest/testdata/histograms.test
vendored
28
promql/promqltest/testdata/histograms.test
vendored
@ -104,15 +104,43 @@ eval instant at 50m histogram_stdvar(testhistogram3)
|
||||
{start="negative"} 17.495112615949154
|
||||
|
||||
# Test histogram_fraction.
|
||||
#
|
||||
eval instant at 50m histogram_fraction(0, 4, testhistogram2)
|
||||
{} 0.6666666666666666
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 4, testhistogram2_bucket)
|
||||
{} 0.6666666666666666
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 6, testhistogram2)
|
||||
{} 1
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 6, testhistogram2_bucket)
|
||||
{} 1
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 3.5, testhistogram2)
|
||||
{} 0.5833333333333334
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 3.5, testhistogram2_bucket)
|
||||
{} 0.5833333333333334
|
||||
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 0.2, testhistogram3)
|
||||
{start="positive"} 0.6363636363636364
|
||||
{start="negative"} 0
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 0.2, testhistogram3_bucket)
|
||||
{start="positive"} 0.6363636363636364
|
||||
{start="negative"} 0
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[10m]))
|
||||
{start="positive"} 0.6363636363636364
|
||||
{start="negative"} 0
|
||||
|
||||
|
||||
eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3_bucket[10m]))
|
||||
{start="positive"} 0.6363636363636364
|
||||
{start="negative"} 0
|
||||
|
||||
# In the classic histogram, we can access the corresponding bucket (if
|
||||
# it exists) and divide by the count to get the same result.
|
||||
|
||||
|
@ -448,6 +448,84 @@ func HistogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
|
||||
return (upperRank - lowerRank) / h.Count
|
||||
}
|
||||
|
||||
// BucketFraction is a version of HistogramFraction for classic histograms.
|
||||
func BucketFraction(lower, upper float64, buckets Buckets) float64 {
|
||||
slices.SortFunc(buckets, func(a, b Bucket) int {
|
||||
// We don't expect the bucket boundary to be a NaN.
|
||||
if a.UpperBound < b.UpperBound {
|
||||
return -1
|
||||
}
|
||||
if a.UpperBound > b.UpperBound {
|
||||
return +1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
if !math.IsInf(buckets[len(buckets)-1].UpperBound, +1) {
|
||||
return math.NaN()
|
||||
}
|
||||
buckets = coalesceBuckets(buckets)
|
||||
|
||||
count := buckets[len(buckets)-1].Count
|
||||
if count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
|
||||
return math.NaN()
|
||||
}
|
||||
if lower >= upper {
|
||||
return 0
|
||||
}
|
||||
|
||||
var (
|
||||
rank, lowerRank, upperRank float64
|
||||
lowerSet, upperSet bool
|
||||
)
|
||||
for i, b := range buckets {
|
||||
lowerBound := math.Inf(-1)
|
||||
if i > 0 {
|
||||
lowerBound = buckets[i-1].UpperBound
|
||||
}
|
||||
upperBound := b.UpperBound
|
||||
|
||||
interpolateLinearly := func(v float64) float64 {
|
||||
return rank + (b.Count-rank)*(v-lowerBound)/(upperBound-lowerBound)
|
||||
}
|
||||
|
||||
if !lowerSet && lowerBound >= lower {
|
||||
// We have hit the lower value at the lower bucket boundary.
|
||||
lowerRank = rank
|
||||
lowerSet = true
|
||||
}
|
||||
if !upperSet && lowerBound >= upper {
|
||||
// We have hit the upper value at the lower bucket boundary.
|
||||
upperRank = rank
|
||||
upperSet = true
|
||||
}
|
||||
if lowerSet && upperSet {
|
||||
break
|
||||
}
|
||||
if !lowerSet && lowerBound < lower && upperBound > lower {
|
||||
// The lower value is in this bucket.
|
||||
lowerRank = interpolateLinearly(lower)
|
||||
lowerSet = true
|
||||
}
|
||||
if !upperSet && lowerBound < upper && upperBound > upper {
|
||||
// The upper value is in this bucket.
|
||||
upperRank = interpolateLinearly(upper)
|
||||
upperSet = true
|
||||
}
|
||||
if lowerSet && upperSet {
|
||||
break
|
||||
}
|
||||
rank = b.Count
|
||||
}
|
||||
if !lowerSet || lowerRank > count {
|
||||
lowerRank = count
|
||||
}
|
||||
if !upperSet || upperRank > count {
|
||||
upperRank = count
|
||||
}
|
||||
|
||||
return (upperRank - lowerRank) / count
|
||||
}
|
||||
|
||||
// coalesceBuckets merges buckets with the same upper bound.
|
||||
//
|
||||
// The input buckets must be sorted.
|
||||
|
Loading…
Reference in New Issue
Block a user