mirror of
https://github.com/prometheus/prometheus.git
synced 2025-08-07 06:37:17 +02:00
promql: histogram_fraction for bucket histograms (#16095)
* promql: histogram_fraction for bucket histograms This PR extends the histogram_fraction function to also work with classic bucket histograms. This is beneficial because it allows expressions like sum(increase(my_bucket{le="0.5"}[10m]))/sum(increase(my_total[10m])) to be written without knowing the actual values of the "le" label, easing the transition to native histograms later on. It also feels natural since histogram_quantile also can deal with classic histograms. Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> * promql: histogram_fraction for bucket histograms * Add documentation and reduce code duplication * Fix a bug in linear interpolation between bucket boundaries * Add more PromQL tests Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> * Update docs/querying/functions.md Co-authored-by: Björn Rabenstein <github@rabenste.in> Signed-off-by: Michael Hoffmann <mhoffm@posteo.de> --------- Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com> Signed-off-by: Michael Hoffmann <mhoffm@posteo.de> Co-authored-by: Björn Rabenstein <github@rabenste.in>
This commit is contained in:
parent
d9c0ad1e61
commit
d6d9f97bac
@ -253,10 +253,23 @@ histogram samples:
|
|||||||
|
|
||||||
## `histogram_fraction()`
|
## `histogram_fraction()`
|
||||||
|
|
||||||
`histogram_fraction(lower scalar, upper scalar, v instant-vector)` returns the
|
`histogram_fraction(lower scalar, upper scalar, b instant-vector)` returns the
|
||||||
estimated fraction of observations between the provided lower and upper values
|
estimated fraction of observations between the provided lower and upper values
|
||||||
for each histogram sample in `v`. Float samples are ignored and do not show up
|
for each classic or native histogram contained in `b`. Float samples in `b` are
|
||||||
in the returned vector.
|
considered the counts of observations in each bucket of one or more classic
|
||||||
|
histograms, while native histogram samples in `b` are treated each individually
|
||||||
|
as a separate histogram. This works in the same way as for `histogram_quantile()`.
|
||||||
|
(See there for more details.)
|
||||||
|
|
||||||
|
If the provided lower and upper values do not coincide with bucket boundaries,
|
||||||
|
the calculated fraction is an estimate, using the same interpolation method as for
|
||||||
|
`histogram_quantile()`. (See there for more details.) Especially with classic
|
||||||
|
histograms, it is easy to accidentally pick lower or upper values that are very
|
||||||
|
far away from any bucket boundary, leading to large margins of error. Rather than
|
||||||
|
using `histogram_fraction()` with classic histograms, it is often a more robust approach
|
||||||
|
to directly act on the bucket series when calculating fractions. See the
|
||||||
|
[calculation of the Apdex scare](https://prometheus.io/docs/practices/histograms/#apdex-score)
|
||||||
|
as a typical example.
|
||||||
|
|
||||||
For example, the following expression calculates the fraction of HTTP requests
|
For example, the following expression calculates the fraction of HTTP requests
|
||||||
over the last hour that took 200ms or less:
|
over the last hour that took 200ms or less:
|
||||||
@ -280,8 +293,8 @@ feature inclusive upper boundaries and exclusive lower boundaries for positive
|
|||||||
values, and vice versa for negative values.) Without a precise alignment of
|
values, and vice versa for negative values.) Without a precise alignment of
|
||||||
boundaries, the function uses interpolation to estimate the fraction. With the
|
boundaries, the function uses interpolation to estimate the fraction. With the
|
||||||
resulting uncertainty, it becomes irrelevant if the boundaries are inclusive or
|
resulting uncertainty, it becomes irrelevant if the boundaries are inclusive or
|
||||||
exclusive. The interpolation method is the same as the one used for
|
exclusive.
|
||||||
`histogram_quantile()`. See there for more details.
|
|
||||||
|
|
||||||
## `histogram_quantile()`
|
## `histogram_quantile()`
|
||||||
|
|
||||||
|
@ -1137,8 +1137,9 @@ type EvalNodeHelper struct {
|
|||||||
Out Vector
|
Out Vector
|
||||||
|
|
||||||
// Caches.
|
// Caches.
|
||||||
// funcHistogramQuantile for classic histograms.
|
// funcHistogramQuantile and funcHistogramFraction for classic histograms.
|
||||||
signatureToMetricWithBuckets map[string]*metricWithBuckets
|
signatureToMetricWithBuckets map[string]*metricWithBuckets
|
||||||
|
nativeHistogramSamples []Sample
|
||||||
|
|
||||||
lb *labels.Builder
|
lb *labels.Builder
|
||||||
lblBuf []byte
|
lblBuf []byte
|
||||||
@ -1161,6 +1162,62 @@ func (enh *EvalNodeHelper) resetBuilder(lbls labels.Labels) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resetHistograms prepares the histogram caches by splitting the given vector into native and classic histograms.
|
||||||
|
func (enh *EvalNodeHelper) resetHistograms(inVec Vector, arg parser.Expr) annotations.Annotations {
|
||||||
|
var annos annotations.Annotations
|
||||||
|
|
||||||
|
if enh.signatureToMetricWithBuckets == nil {
|
||||||
|
enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
|
||||||
|
} else {
|
||||||
|
for _, v := range enh.signatureToMetricWithBuckets {
|
||||||
|
v.buckets = v.buckets[:0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
enh.nativeHistogramSamples = enh.nativeHistogramSamples[:0]
|
||||||
|
|
||||||
|
for _, sample := range inVec {
|
||||||
|
// We are only looking for classic buckets here. Remember
|
||||||
|
// the histograms for later treatment.
|
||||||
|
if sample.H != nil {
|
||||||
|
enh.nativeHistogramSamples = append(enh.nativeHistogramSamples, sample)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
upperBound, err := strconv.ParseFloat(
|
||||||
|
sample.Metric.Get(model.BucketLabel), 64,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
annos.Add(annotations.NewBadBucketLabelWarning(sample.Metric.Get(labels.MetricName), sample.Metric.Get(model.BucketLabel), arg.PositionRange()))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
enh.lblBuf = sample.Metric.BytesWithoutLabels(enh.lblBuf, labels.BucketLabel)
|
||||||
|
mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]
|
||||||
|
if !ok {
|
||||||
|
sample.Metric = labels.NewBuilder(sample.Metric).
|
||||||
|
Del(excludedLabels...).
|
||||||
|
Labels()
|
||||||
|
mb = &metricWithBuckets{sample.Metric, nil}
|
||||||
|
enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb
|
||||||
|
}
|
||||||
|
mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F})
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sample := range enh.nativeHistogramSamples {
|
||||||
|
// We have to reconstruct the exact same signature as above for
|
||||||
|
// a classic histogram, just ignoring any le label.
|
||||||
|
enh.lblBuf = sample.Metric.Bytes(enh.lblBuf)
|
||||||
|
if mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]; ok && len(mb.buckets) > 0 {
|
||||||
|
// At this data point, we have classic histogram
|
||||||
|
// buckets and a native histogram with the same name and
|
||||||
|
// labels. Do not evaluate anything.
|
||||||
|
annos.Add(annotations.NewMixedClassicNativeHistogramsWarning(sample.Metric.Get(labels.MetricName), arg.PositionRange()))
|
||||||
|
delete(enh.signatureToMetricWithBuckets, string(enh.lblBuf))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return annos
|
||||||
|
}
|
||||||
|
|
||||||
// rangeEval evaluates the given expressions, and then for each step calls
|
// rangeEval evaluates the given expressions, and then for each step calls
|
||||||
// the given funcCall with the values computed for each expression at that
|
// the given funcCall with the values computed for each expression at that
|
||||||
// step. The return value is the combination into time series of all the
|
// step. The return value is the combination into time series of all the
|
||||||
|
@ -20,7 +20,6 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"slices"
|
"slices"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -1390,16 +1389,15 @@ func funcHistogramStdVar(vals []parser.Value, _ parser.Expressions, enh *EvalNod
|
|||||||
}
|
}
|
||||||
|
|
||||||
// === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
|
// === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
|
||||||
func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
|
func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
|
||||||
lower := vals[0].(Vector)[0].F
|
lower := vals[0].(Vector)[0].F
|
||||||
upper := vals[1].(Vector)[0].F
|
upper := vals[1].(Vector)[0].F
|
||||||
inVec := vals[2].(Vector)
|
inVec := vals[2].(Vector)
|
||||||
|
|
||||||
for _, sample := range inVec {
|
annos := enh.resetHistograms(inVec, args[2])
|
||||||
// Skip non-histogram samples.
|
|
||||||
if sample.H == nil {
|
// Deal with the native histograms.
|
||||||
continue
|
for _, sample := range enh.nativeHistogramSamples {
|
||||||
}
|
|
||||||
if !enh.enableDelayedNameRemoval {
|
if !enh.enableDelayedNameRemoval {
|
||||||
sample.Metric = sample.Metric.DropMetricName()
|
sample.Metric = sample.Metric.DropMetricName()
|
||||||
}
|
}
|
||||||
@ -1409,7 +1407,24 @@ func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalN
|
|||||||
DropName: true,
|
DropName: true,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return enh.Out, nil
|
|
||||||
|
// Deal with classic histograms that have already been filtered for conflicting native histograms.
|
||||||
|
for _, mb := range enh.signatureToMetricWithBuckets {
|
||||||
|
if len(mb.buckets) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !enh.enableDelayedNameRemoval {
|
||||||
|
mb.metric = mb.metric.DropMetricName()
|
||||||
|
}
|
||||||
|
|
||||||
|
enh.Out = append(enh.Out, Sample{
|
||||||
|
Metric: mb.metric,
|
||||||
|
F: BucketFraction(lower, upper, mb.buckets),
|
||||||
|
DropName: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return enh.Out, annos
|
||||||
}
|
}
|
||||||
|
|
||||||
// === histogram_quantile(k parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
|
// === histogram_quantile(k parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
|
||||||
@ -1421,58 +1436,10 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
|
|||||||
if math.IsNaN(q) || q < 0 || q > 1 {
|
if math.IsNaN(q) || q < 0 || q > 1 {
|
||||||
annos.Add(annotations.NewInvalidQuantileWarning(q, args[0].PositionRange()))
|
annos.Add(annotations.NewInvalidQuantileWarning(q, args[0].PositionRange()))
|
||||||
}
|
}
|
||||||
|
annos.Merge(enh.resetHistograms(inVec, args[1]))
|
||||||
|
|
||||||
if enh.signatureToMetricWithBuckets == nil {
|
// Deal with the native histograms.
|
||||||
enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
|
for _, sample := range enh.nativeHistogramSamples {
|
||||||
} else {
|
|
||||||
for _, v := range enh.signatureToMetricWithBuckets {
|
|
||||||
v.buckets = v.buckets[:0]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var histogramSamples []Sample
|
|
||||||
|
|
||||||
for _, sample := range inVec {
|
|
||||||
// We are only looking for classic buckets here. Remember
|
|
||||||
// the histograms for later treatment.
|
|
||||||
if sample.H != nil {
|
|
||||||
histogramSamples = append(histogramSamples, sample)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
upperBound, err := strconv.ParseFloat(
|
|
||||||
sample.Metric.Get(model.BucketLabel), 64,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
annos.Add(annotations.NewBadBucketLabelWarning(sample.Metric.Get(labels.MetricName), sample.Metric.Get(model.BucketLabel), args[1].PositionRange()))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
enh.lblBuf = sample.Metric.BytesWithoutLabels(enh.lblBuf, labels.BucketLabel)
|
|
||||||
mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]
|
|
||||||
if !ok {
|
|
||||||
sample.Metric = labels.NewBuilder(sample.Metric).
|
|
||||||
Del(excludedLabels...).
|
|
||||||
Labels()
|
|
||||||
mb = &metricWithBuckets{sample.Metric, nil}
|
|
||||||
enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb
|
|
||||||
}
|
|
||||||
mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now deal with the native histograms.
|
|
||||||
for _, sample := range histogramSamples {
|
|
||||||
// We have to reconstruct the exact same signature as above for
|
|
||||||
// a classic histogram, just ignoring any le label.
|
|
||||||
enh.lblBuf = sample.Metric.Bytes(enh.lblBuf)
|
|
||||||
if mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]; ok && len(mb.buckets) > 0 {
|
|
||||||
// At this data point, we have classic histogram
|
|
||||||
// buckets and a native histogram with the same name and
|
|
||||||
// labels. Do not evaluate anything.
|
|
||||||
annos.Add(annotations.NewMixedClassicNativeHistogramsWarning(sample.Metric.Get(labels.MetricName), args[1].PositionRange()))
|
|
||||||
delete(enh.signatureToMetricWithBuckets, string(enh.lblBuf))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if !enh.enableDelayedNameRemoval {
|
if !enh.enableDelayedNameRemoval {
|
||||||
sample.Metric = sample.Metric.DropMetricName()
|
sample.Metric = sample.Metric.DropMetricName()
|
||||||
}
|
}
|
||||||
@ -1483,7 +1450,7 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now do classic histograms that have already been filtered for conflicting native histograms.
|
// Deal with classic histograms that have already been filtered for conflicting native histograms.
|
||||||
for _, mb := range enh.signatureToMetricWithBuckets {
|
for _, mb := range enh.signatureToMetricWithBuckets {
|
||||||
if len(mb.buckets) > 0 {
|
if len(mb.buckets) > 0 {
|
||||||
res, forcedMonotonicity, _ := BucketQuantile(q, mb.buckets)
|
res, forcedMonotonicity, _ := BucketQuantile(q, mb.buckets)
|
||||||
|
28
promql/promqltest/testdata/histograms.test
vendored
28
promql/promqltest/testdata/histograms.test
vendored
@ -104,15 +104,43 @@ eval instant at 50m histogram_stdvar(testhistogram3)
|
|||||||
{start="negative"} 17.495112615949154
|
{start="negative"} 17.495112615949154
|
||||||
|
|
||||||
# Test histogram_fraction.
|
# Test histogram_fraction.
|
||||||
|
#
|
||||||
|
eval instant at 50m histogram_fraction(0, 4, testhistogram2)
|
||||||
|
{} 0.6666666666666666
|
||||||
|
|
||||||
|
eval instant at 50m histogram_fraction(0, 4, testhistogram2_bucket)
|
||||||
|
{} 0.6666666666666666
|
||||||
|
|
||||||
|
eval instant at 50m histogram_fraction(0, 6, testhistogram2)
|
||||||
|
{} 1
|
||||||
|
|
||||||
|
eval instant at 50m histogram_fraction(0, 6, testhistogram2_bucket)
|
||||||
|
{} 1
|
||||||
|
|
||||||
|
eval instant at 50m histogram_fraction(0, 3.5, testhistogram2)
|
||||||
|
{} 0.5833333333333334
|
||||||
|
|
||||||
|
eval instant at 50m histogram_fraction(0, 3.5, testhistogram2_bucket)
|
||||||
|
{} 0.5833333333333334
|
||||||
|
|
||||||
|
|
||||||
eval instant at 50m histogram_fraction(0, 0.2, testhistogram3)
|
eval instant at 50m histogram_fraction(0, 0.2, testhistogram3)
|
||||||
{start="positive"} 0.6363636363636364
|
{start="positive"} 0.6363636363636364
|
||||||
{start="negative"} 0
|
{start="negative"} 0
|
||||||
|
|
||||||
|
eval instant at 50m histogram_fraction(0, 0.2, testhistogram3_bucket)
|
||||||
|
{start="positive"} 0.6363636363636364
|
||||||
|
{start="negative"} 0
|
||||||
|
|
||||||
eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[10m]))
|
eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[10m]))
|
||||||
{start="positive"} 0.6363636363636364
|
{start="positive"} 0.6363636363636364
|
||||||
{start="negative"} 0
|
{start="negative"} 0
|
||||||
|
|
||||||
|
|
||||||
|
eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3_bucket[10m]))
|
||||||
|
{start="positive"} 0.6363636363636364
|
||||||
|
{start="negative"} 0
|
||||||
|
|
||||||
# In the classic histogram, we can access the corresponding bucket (if
|
# In the classic histogram, we can access the corresponding bucket (if
|
||||||
# it exists) and divide by the count to get the same result.
|
# it exists) and divide by the count to get the same result.
|
||||||
|
|
||||||
|
@ -448,6 +448,84 @@ func HistogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
|
|||||||
return (upperRank - lowerRank) / h.Count
|
return (upperRank - lowerRank) / h.Count
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BucketFraction is a version of HistogramFraction for classic histograms.
|
||||||
|
func BucketFraction(lower, upper float64, buckets Buckets) float64 {
|
||||||
|
slices.SortFunc(buckets, func(a, b Bucket) int {
|
||||||
|
// We don't expect the bucket boundary to be a NaN.
|
||||||
|
if a.UpperBound < b.UpperBound {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
if a.UpperBound > b.UpperBound {
|
||||||
|
return +1
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
})
|
||||||
|
if !math.IsInf(buckets[len(buckets)-1].UpperBound, +1) {
|
||||||
|
return math.NaN()
|
||||||
|
}
|
||||||
|
buckets = coalesceBuckets(buckets)
|
||||||
|
|
||||||
|
count := buckets[len(buckets)-1].Count
|
||||||
|
if count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
|
||||||
|
return math.NaN()
|
||||||
|
}
|
||||||
|
if lower >= upper {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
rank, lowerRank, upperRank float64
|
||||||
|
lowerSet, upperSet bool
|
||||||
|
)
|
||||||
|
for i, b := range buckets {
|
||||||
|
lowerBound := math.Inf(-1)
|
||||||
|
if i > 0 {
|
||||||
|
lowerBound = buckets[i-1].UpperBound
|
||||||
|
}
|
||||||
|
upperBound := b.UpperBound
|
||||||
|
|
||||||
|
interpolateLinearly := func(v float64) float64 {
|
||||||
|
return rank + (b.Count-rank)*(v-lowerBound)/(upperBound-lowerBound)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !lowerSet && lowerBound >= lower {
|
||||||
|
// We have hit the lower value at the lower bucket boundary.
|
||||||
|
lowerRank = rank
|
||||||
|
lowerSet = true
|
||||||
|
}
|
||||||
|
if !upperSet && lowerBound >= upper {
|
||||||
|
// We have hit the upper value at the lower bucket boundary.
|
||||||
|
upperRank = rank
|
||||||
|
upperSet = true
|
||||||
|
}
|
||||||
|
if lowerSet && upperSet {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !lowerSet && lowerBound < lower && upperBound > lower {
|
||||||
|
// The lower value is in this bucket.
|
||||||
|
lowerRank = interpolateLinearly(lower)
|
||||||
|
lowerSet = true
|
||||||
|
}
|
||||||
|
if !upperSet && lowerBound < upper && upperBound > upper {
|
||||||
|
// The upper value is in this bucket.
|
||||||
|
upperRank = interpolateLinearly(upper)
|
||||||
|
upperSet = true
|
||||||
|
}
|
||||||
|
if lowerSet && upperSet {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
rank = b.Count
|
||||||
|
}
|
||||||
|
if !lowerSet || lowerRank > count {
|
||||||
|
lowerRank = count
|
||||||
|
}
|
||||||
|
if !upperSet || upperRank > count {
|
||||||
|
upperRank = count
|
||||||
|
}
|
||||||
|
|
||||||
|
return (upperRank - lowerRank) / count
|
||||||
|
}
|
||||||
|
|
||||||
// coalesceBuckets merges buckets with the same upper bound.
|
// coalesceBuckets merges buckets with the same upper bound.
|
||||||
//
|
//
|
||||||
// The input buckets must be sorted.
|
// The input buckets must be sorted.
|
||||||
|
Loading…
Reference in New Issue
Block a user