mirror of
				https://github.com/prometheus/prometheus.git
				synced 2025-10-31 16:31:03 +01:00 
			
		
		
		
	The bucket receiving math.MaxFloat64 observations now has math.MaxFloat64 as upper bound, while the bucket after it (the last possible bucket) has +Inf. This also adds a test for getBound and moves the getBound code to generic.go (where it should have been in the first place). Signed-off-by: beorn7 <beorn@grafana.com>
		
			
				
	
	
		
			387 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			387 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2015 The Prometheus Authors
 | ||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||
| // you may not use this file except in compliance with the License.
 | ||
| // You may obtain a copy of the License at
 | ||
| //
 | ||
| // http://www.apache.org/licenses/LICENSE-2.0
 | ||
| //
 | ||
| // Unless required by applicable law or agreed to in writing, software
 | ||
| // distributed under the License is distributed on an "AS IS" BASIS,
 | ||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||
| // See the License for the specific language governing permissions and
 | ||
| // limitations under the License.
 | ||
| 
 | ||
| package promql
 | ||
| 
 | ||
| import (
 | ||
| 	"math"
 | ||
| 	"sort"
 | ||
| 
 | ||
| 	"github.com/prometheus/prometheus/model/histogram"
 | ||
| 	"github.com/prometheus/prometheus/model/labels"
 | ||
| )
 | ||
| 
 | ||
| // Helpers to calculate quantiles.
 | ||
| 
 | ||
| // excludedLabels are the labels to exclude from signature calculation for
 | ||
| // quantiles.
 | ||
| var excludedLabels = []string{
 | ||
| 	labels.MetricName,
 | ||
| 	labels.BucketLabel,
 | ||
| }
 | ||
| 
 | ||
| type bucket struct {
 | ||
| 	upperBound float64
 | ||
| 	count      float64
 | ||
| }
 | ||
| 
 | ||
| // buckets implements sort.Interface.
 | ||
| type buckets []bucket
 | ||
| 
 | ||
| func (b buckets) Len() int           { return len(b) }
 | ||
| func (b buckets) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
 | ||
| func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound }
 | ||
| 
 | ||
| type metricWithBuckets struct {
 | ||
| 	metric  labels.Labels
 | ||
| 	buckets buckets
 | ||
| }
 | ||
| 
 | ||
| // bucketQuantile calculates the quantile 'q' based on the given buckets. The
 | ||
| // buckets will be sorted by upperBound by this function (i.e. no sorting
 | ||
| // needed before calling this function). The quantile value is interpolated
 | ||
| // assuming a linear distribution within a bucket. However, if the quantile
 | ||
| // falls into the highest bucket, the upper bound of the 2nd highest bucket is
 | ||
| // returned. A natural lower bound of 0 is assumed if the upper bound of the
 | ||
| // lowest bucket is greater 0. In that case, interpolation in the lowest bucket
 | ||
| // happens linearly between 0 and the upper bound of the lowest bucket.
 | ||
| // However, if the lowest bucket has an upper bound less or equal 0, this upper
 | ||
| // bound is returned if the quantile falls into the lowest bucket.
 | ||
| //
 | ||
| // There are a number of special cases (once we have a way to report errors
 | ||
| // happening during evaluations of AST functions, we should report those
 | ||
| // explicitly):
 | ||
| //
 | ||
| // If 'buckets' has 0 observations, NaN is returned.
 | ||
| //
 | ||
| // If 'buckets' has fewer than 2 elements, NaN is returned.
 | ||
| //
 | ||
| // If the highest bucket is not +Inf, NaN is returned.
 | ||
| //
 | ||
| // If q==NaN, NaN is returned.
 | ||
| //
 | ||
| // If q<0, -Inf is returned.
 | ||
| //
 | ||
| // If q>1, +Inf is returned.
 | ||
| func bucketQuantile(q float64, buckets buckets) float64 {
 | ||
| 	if math.IsNaN(q) {
 | ||
| 		return math.NaN()
 | ||
| 	}
 | ||
| 	if q < 0 {
 | ||
| 		return math.Inf(-1)
 | ||
| 	}
 | ||
| 	if q > 1 {
 | ||
| 		return math.Inf(+1)
 | ||
| 	}
 | ||
| 	sort.Sort(buckets)
 | ||
| 	if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
 | ||
| 		return math.NaN()
 | ||
| 	}
 | ||
| 
 | ||
| 	buckets = coalesceBuckets(buckets)
 | ||
| 	ensureMonotonic(buckets)
 | ||
| 
 | ||
| 	if len(buckets) < 2 {
 | ||
| 		return math.NaN()
 | ||
| 	}
 | ||
| 	observations := buckets[len(buckets)-1].count
 | ||
| 	if observations == 0 {
 | ||
| 		return math.NaN()
 | ||
| 	}
 | ||
| 	rank := q * observations
 | ||
| 	b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
 | ||
| 
 | ||
| 	if b == len(buckets)-1 {
 | ||
| 		return buckets[len(buckets)-2].upperBound
 | ||
| 	}
 | ||
| 	if b == 0 && buckets[0].upperBound <= 0 {
 | ||
| 		return buckets[0].upperBound
 | ||
| 	}
 | ||
| 	var (
 | ||
| 		bucketStart float64
 | ||
| 		bucketEnd   = buckets[b].upperBound
 | ||
| 		count       = buckets[b].count
 | ||
| 	)
 | ||
| 	if b > 0 {
 | ||
| 		bucketStart = buckets[b-1].upperBound
 | ||
| 		count -= buckets[b-1].count
 | ||
| 		rank -= buckets[b-1].count
 | ||
| 	}
 | ||
| 	return bucketStart + (bucketEnd-bucketStart)*(rank/count)
 | ||
| }
 | ||
| 
 | ||
| // histogramQuantile calculates the quantile 'q' based on the given histogram.
 | ||
| //
 | ||
| // The quantile value is interpolated assuming a linear distribution within a
 | ||
| // bucket.
 | ||
| // TODO(beorn7): Find an interpolation method that is a better fit for
 | ||
| // exponential buckets (and think about configurable interpolation).
 | ||
| //
 | ||
| // A natural lower bound of 0 is assumed if the histogram has only positive
 | ||
| // buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has
 | ||
| // only negative buckets.
 | ||
| // TODO(beorn7): Come to terms if we want that.
 | ||
| //
 | ||
| // There are a number of special cases (once we have a way to report errors
 | ||
| // happening during evaluations of AST functions, we should report those
 | ||
| // explicitly):
 | ||
| //
 | ||
| // If the histogram has 0 observations, NaN is returned.
 | ||
| //
 | ||
| // If q<0, -Inf is returned.
 | ||
| //
 | ||
| // If q>1, +Inf is returned.
 | ||
| //
 | ||
| // If q is NaN, NaN is returned.
 | ||
| func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
 | ||
| 	if q < 0 {
 | ||
| 		return math.Inf(-1)
 | ||
| 	}
 | ||
| 	if q > 1 {
 | ||
| 		return math.Inf(+1)
 | ||
| 	}
 | ||
| 
 | ||
| 	if h.Count == 0 || math.IsNaN(q) {
 | ||
| 		return math.NaN()
 | ||
| 	}
 | ||
| 
 | ||
| 	var (
 | ||
| 		bucket histogram.Bucket[float64]
 | ||
| 		count  float64
 | ||
| 		it     = h.AllBucketIterator()
 | ||
| 		rank   = q * h.Count
 | ||
| 	)
 | ||
| 	for it.Next() {
 | ||
| 		bucket = it.At()
 | ||
| 		count += bucket.Count
 | ||
| 		if count >= rank {
 | ||
| 			break
 | ||
| 		}
 | ||
| 	}
 | ||
| 	if bucket.Lower < 0 && bucket.Upper > 0 {
 | ||
| 		if len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0 {
 | ||
| 			// The result is in the zero bucket and the histogram has only
 | ||
| 			// positive buckets. So we consider 0 to be the lower bound.
 | ||
| 			bucket.Lower = 0
 | ||
| 		} else if len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0 {
 | ||
| 			// The result is in the zero bucket and the histogram has only
 | ||
| 			// negative buckets. So we consider 0 to be the upper bound.
 | ||
| 			bucket.Upper = 0
 | ||
| 		}
 | ||
| 	}
 | ||
| 	// Due to numerical inaccuracies, we could end up with a higher count
 | ||
| 	// than h.Count. Thus, make sure count is never higher than h.Count.
 | ||
| 	if count > h.Count {
 | ||
| 		count = h.Count
 | ||
| 	}
 | ||
| 	// We could have hit the highest bucket without even reaching the rank
 | ||
| 	// (this should only happen if the histogram contains observations of
 | ||
| 	// the value NaN), in which case we simply return the upper limit of the
 | ||
| 	// highest explicit bucket.
 | ||
| 	if count < rank {
 | ||
| 		return bucket.Upper
 | ||
| 	}
 | ||
| 
 | ||
| 	rank -= count - bucket.Count
 | ||
| 	// TODO(codesome): Use a better estimation than linear.
 | ||
| 	return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
 | ||
| }
 | ||
| 
 | ||
| // histogramFraction calculates the fraction of observations between the
 | ||
| // provided lower and upper bounds, based on the provided histogram.
 | ||
| //
 | ||
| // histogramFraction is in a certain way the inverse of histogramQuantile.  If
 | ||
| // histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h)
 | ||
| // returns 0.9.
 | ||
| //
 | ||
| // The same notes (and TODOs) with regard to interpolation and assumptions about
 | ||
| // the zero bucket boundaries apply as for histogramQuantile.
 | ||
| //
 | ||
| // Whether either boundary is inclusive or exclusive doesn’t actually matter as
 | ||
| // long as interpolation has to be performed anyway. In the case of a boundary
 | ||
| // coinciding with a bucket boundary, the inclusive or exclusive nature of the
 | ||
| // boundary determines the exact behavior of the threshold. With the current
 | ||
| // implementation, that means that lower is exclusive for positive values and
 | ||
| // inclusive for negative values, while upper is inclusive for positive values
 | ||
| // and exclusive for negative values.
 | ||
| //
 | ||
| // Special cases:
 | ||
| //
 | ||
| // If the histogram has 0 observations, NaN is returned.
 | ||
| //
 | ||
| // Use a lower bound of -Inf to get the fraction of all observations below the
 | ||
| // upper bound.
 | ||
| //
 | ||
| // Use an upper bound of +Inf to get the fraction of all observations above the
 | ||
| // lower bound.
 | ||
| //
 | ||
| // If lower or upper is NaN, NaN is returned.
 | ||
| //
 | ||
| // If lower >= upper and the histogram has at least 1 observation, zero is returned.
 | ||
| func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float64 {
 | ||
| 	if h.Count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
 | ||
| 		return math.NaN()
 | ||
| 	}
 | ||
| 	if lower >= upper {
 | ||
| 		return 0
 | ||
| 	}
 | ||
| 
 | ||
| 	var (
 | ||
| 		rank, lowerRank, upperRank float64
 | ||
| 		lowerSet, upperSet         bool
 | ||
| 		it                         = h.AllBucketIterator()
 | ||
| 	)
 | ||
| 	for it.Next() {
 | ||
| 		b := it.At()
 | ||
| 		if b.Lower < 0 && b.Upper > 0 {
 | ||
| 			if len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0 {
 | ||
| 				// This is the zero bucket and the histogram has only
 | ||
| 				// positive buckets. So we consider 0 to be the lower
 | ||
| 				// bound.
 | ||
| 				b.Lower = 0
 | ||
| 			} else if len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0 {
 | ||
| 				// This is in the zero bucket and the histogram has only
 | ||
| 				// negative buckets. So we consider 0 to be the upper
 | ||
| 				// bound.
 | ||
| 				b.Upper = 0
 | ||
| 			}
 | ||
| 		}
 | ||
| 		if !lowerSet && b.Lower >= lower {
 | ||
| 			lowerRank = rank
 | ||
| 			lowerSet = true
 | ||
| 		}
 | ||
| 		if !upperSet && b.Lower >= upper {
 | ||
| 			upperRank = rank
 | ||
| 			upperSet = true
 | ||
| 		}
 | ||
| 		if lowerSet && upperSet {
 | ||
| 			break
 | ||
| 		}
 | ||
| 		if !lowerSet && b.Lower < lower && b.Upper > lower {
 | ||
| 			lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower)
 | ||
| 			lowerSet = true
 | ||
| 		}
 | ||
| 		if !upperSet && b.Lower < upper && b.Upper > upper {
 | ||
| 			upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower)
 | ||
| 			upperSet = true
 | ||
| 		}
 | ||
| 		if lowerSet && upperSet {
 | ||
| 			break
 | ||
| 		}
 | ||
| 		rank += b.Count
 | ||
| 	}
 | ||
| 	if !lowerSet || lowerRank > h.Count {
 | ||
| 		lowerRank = h.Count
 | ||
| 	}
 | ||
| 	if !upperSet || upperRank > h.Count {
 | ||
| 		upperRank = h.Count
 | ||
| 	}
 | ||
| 
 | ||
| 	return (upperRank - lowerRank) / h.Count
 | ||
| }
 | ||
| 
 | ||
| // coalesceBuckets merges buckets with the same upper bound.
 | ||
| //
 | ||
| // The input buckets must be sorted.
 | ||
| func coalesceBuckets(buckets buckets) buckets {
 | ||
| 	last := buckets[0]
 | ||
| 	i := 0
 | ||
| 	for _, b := range buckets[1:] {
 | ||
| 		if b.upperBound == last.upperBound {
 | ||
| 			last.count += b.count
 | ||
| 		} else {
 | ||
| 			buckets[i] = last
 | ||
| 			last = b
 | ||
| 			i++
 | ||
| 		}
 | ||
| 	}
 | ||
| 	buckets[i] = last
 | ||
| 	return buckets[:i+1]
 | ||
| }
 | ||
| 
 | ||
| // The assumption that bucket counts increase monotonically with increasing
 | ||
| // upperBound may be violated during:
 | ||
| //
 | ||
| //   * Recording rule evaluation of histogram_quantile, especially when rate()
 | ||
| //      has been applied to the underlying bucket timeseries.
 | ||
| //   * Evaluation of histogram_quantile computed over federated bucket
 | ||
| //      timeseries, especially when rate() has been applied.
 | ||
| //
 | ||
| // This is because scraped data is not made available to rule evaluation or
 | ||
| // federation atomically, so some buckets are computed with data from the
 | ||
| // most recent scrapes, but the other buckets are missing data from the most
 | ||
| // recent scrape.
 | ||
| //
 | ||
| // Monotonicity is usually guaranteed because if a bucket with upper bound
 | ||
| // u1 has count c1, then any bucket with a higher upper bound u > u1 must
 | ||
| // have counted all c1 observations and perhaps more, so that c  >= c1.
 | ||
| //
 | ||
| // Randomly interspersed partial sampling breaks that guarantee, and rate()
 | ||
| // exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
 | ||
| // 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
 | ||
| // monotonicity is broken. It is exacerbated by rate() because under normal
 | ||
| // operation, cumulative counting of buckets will cause the bucket counts to
 | ||
| // diverge such that small differences from missing samples are not a problem.
 | ||
| // rate() removes this divergence.)
 | ||
| //
 | ||
| // bucketQuantile depends on that monotonicity to do a binary search for the
 | ||
| // bucket with the φ-quantile count, so breaking the monotonicity
 | ||
| // guarantee causes bucketQuantile() to return undefined (nonsense) results.
 | ||
| //
 | ||
| // As a somewhat hacky solution until ingestion is atomic per scrape, we
 | ||
| // calculate the "envelope" of the histogram buckets, essentially removing
 | ||
| // any decreases in the count between successive buckets.
 | ||
| 
 | ||
| func ensureMonotonic(buckets buckets) {
 | ||
| 	max := buckets[0].count
 | ||
| 	for i := 1; i < len(buckets); i++ {
 | ||
| 		switch {
 | ||
| 		case buckets[i].count > max:
 | ||
| 			max = buckets[i].count
 | ||
| 		case buckets[i].count < max:
 | ||
| 			buckets[i].count = max
 | ||
| 		}
 | ||
| 	}
 | ||
| }
 | ||
| 
 | ||
| // quantile calculates the given quantile of a vector of samples.
 | ||
| //
 | ||
| // The Vector will be sorted.
 | ||
| // If 'values' has zero elements, NaN is returned.
 | ||
| // If q==NaN, NaN is returned.
 | ||
| // If q<0, -Inf is returned.
 | ||
| // If q>1, +Inf is returned.
 | ||
| func quantile(q float64, values vectorByValueHeap) float64 {
 | ||
| 	if len(values) == 0 || math.IsNaN(q) {
 | ||
| 		return math.NaN()
 | ||
| 	}
 | ||
| 	if q < 0 {
 | ||
| 		return math.Inf(-1)
 | ||
| 	}
 | ||
| 	if q > 1 {
 | ||
| 		return math.Inf(+1)
 | ||
| 	}
 | ||
| 	sort.Sort(values)
 | ||
| 
 | ||
| 	n := float64(len(values))
 | ||
| 	// When the quantile lies between two samples,
 | ||
| 	// we use a weighted average of the two samples.
 | ||
| 	rank := q * (n - 1)
 | ||
| 
 | ||
| 	lowerIndex := math.Max(0, math.Floor(rank))
 | ||
| 	upperIndex := math.Min(n-1, lowerIndex+1)
 | ||
| 
 | ||
| 	weight := rank - math.Floor(rank)
 | ||
| 	return values[int(lowerIndex)].V*(1-weight) + values[int(upperIndex)].V*weight
 | ||
| }
 |