prometheus/promql/histogram_stats_iterator.go
beorn7 5010bd4bb1 promql: Optimize HistogramStatsIterator by disallowing integer histograms
The `HistogramStatsIterator` is only meant to be used within PromQL.
PromQL only ever uses float histograms. If `HistogramStatsIterator` is
capable of handling integer histograms, it will still be used, for
example by the `BufferedSeriesIterator`, which buffers samples and
will use an integer `Histogram` for it, if the underlying chunk is an
integer histogram chunk (which is common).

However, we can simply intercept the `Next` and `Seek` calls and
pretend to only ever be able te return float histograms. This has the
welcome side effect that we do not have to handle a mix of float and
integer histograms in the `HistogramStatsIterator` anymore.

With this commit, the `AtHistogram` call has been changed to panic so
that we ensure it is never called.

Benchmark differences between this and the previous commit:

name                                                                       old time/op    new time/op    delta
NativeHistograms/histogram_count_with_short_rate_interval-16                  837ms ± 3%     616ms ± 2%  -26.36%  (p=0.008 n=5+5)
NativeHistograms/histogram_count_with_long_rate_interval-16                   1.11s ± 1%     0.91s ± 3%  -17.75%  (p=0.008 n=5+5)
NativeHistogramsCustomBuckets/histogram_count_with_short_rate_interval-16     751ms ± 6%     581ms ± 1%  -22.63%  (p=0.008 n=5+5)
NativeHistogramsCustomBuckets/histogram_count_with_long_rate_interval-16      1.13s ±11%     0.85s ± 2%  -24.59%  (p=0.008 n=5+5)

name                                                                       old alloc/op   new alloc/op   delta
NativeHistograms/histogram_count_with_short_rate_interval-16                  531MB ± 0%     148MB ± 0%  -72.08%  (p=0.008 n=5+5)
NativeHistograms/histogram_count_with_long_rate_interval-16                   528MB ± 0%     145MB ± 0%  -72.60%  (p=0.016 n=5+4)
NativeHistogramsCustomBuckets/histogram_count_with_short_rate_interval-16     452MB ± 0%     145MB ± 0%  -67.97%  (p=0.016 n=5+4)
NativeHistogramsCustomBuckets/histogram_count_with_long_rate_interval-16      452MB ± 0%     141MB ± 0%  -68.70%  (p=0.016 n=5+4)

name                                                                       old allocs/op  new allocs/op  delta
NativeHistograms/histogram_count_with_short_rate_interval-16                  8.95M ± 0%     1.60M ± 0%  -82.15%  (p=0.008 n=5+5)
NativeHistograms/histogram_count_with_long_rate_interval-16                   8.84M ± 0%     1.49M ± 0%  -83.16%  (p=0.008 n=5+5)
NativeHistogramsCustomBuckets/histogram_count_with_short_rate_interval-16     5.96M ± 0%     1.57M ± 0%  -73.68%  (p=0.008 n=5+5)
NativeHistogramsCustomBuckets/histogram_count_with_long_rate_interval-16      5.86M ± 0%     1.46M ± 0%  -75.05%  (p=0.016 n=5+4)

Signed-off-by: beorn7 <beorn@grafana.com>
2025-09-04 14:06:19 +02:00

137 lines
4.7 KiB
Go

// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package promql
import (
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/tsdb/chunkenc"
)
// HistogramStatsIterator is an iterator that returns histogram objects that
// have only their sum and count values populated. The iterator handles counter
// reset detection internally and sets the counter reset hint accordingly in
// each returned histogram object. The Next and Seek methods of the iterator
// will never return ValHistogram, but ValFloatHistogram instead. Effectively,
// the iterator enforces conversion of (integer) Histogram to FloatHistogram.
// The AtHistogram method must not be called (and will panic).
type HistogramStatsIterator struct {
chunkenc.Iterator
currentFH *histogram.FloatHistogram
lastFH *histogram.FloatHistogram
}
// NewHistogramStatsIterator creates a new HistogramStatsIterator.
func NewHistogramStatsIterator(it chunkenc.Iterator) *HistogramStatsIterator {
return &HistogramStatsIterator{
Iterator: it,
currentFH: &histogram.FloatHistogram{},
}
}
// Reset resets this iterator for use with a new underlying iterator, reusing
// objects already allocated where possible.
func (hsi *HistogramStatsIterator) Reset(it chunkenc.Iterator) {
hsi.Iterator = it
hsi.lastFH = nil
}
// Next mostly relays to the underlying iterator, but changes a ValHistogram
// return into a ValFloatHistogram return.
func (hsi *HistogramStatsIterator) Next() chunkenc.ValueType {
vt := hsi.Iterator.Next()
if vt == chunkenc.ValHistogram {
return chunkenc.ValFloatHistogram
}
return vt
}
// Seek mostly relays to the underlying iterator, but changes a ValHistogram
// return into a ValFloatHistogram return.
func (hsi *HistogramStatsIterator) Seek(t int64) chunkenc.ValueType {
vt := hsi.Iterator.Seek(t)
if vt == chunkenc.ValHistogram {
return chunkenc.ValFloatHistogram
}
return vt
}
// AtHistogram must never be called.
func (*HistogramStatsIterator) AtHistogram(*histogram.Histogram) (int64, *histogram.Histogram) {
panic("HistogramStatsIterator.AtHistogram must never be called")
}
// AtFloatHistogram returns the next timestamp/float histogram pair. The method
// performs a counter reset detection on the fly. It will return an explicit
// hint (not UnknownCounterReset) if the previous sample has been accessed with
// the same iterator.
func (hsi *HistogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) {
var t int64
t, hsi.currentFH = hsi.Iterator.AtFloatHistogram(hsi.currentFH)
if value.IsStaleNaN(hsi.currentFH.Sum) {
return t, &histogram.FloatHistogram{Sum: hsi.currentFH.Sum}
}
if fh == nil {
fh = &histogram.FloatHistogram{
CounterResetHint: hsi.getFloatResetHint(hsi.currentFH.CounterResetHint),
Count: hsi.currentFH.Count,
Sum: hsi.currentFH.Sum,
}
hsi.setLastFH(hsi.currentFH)
return t, fh
}
returnValue := histogram.FloatHistogram{
CounterResetHint: hsi.getFloatResetHint(hsi.currentFH.CounterResetHint),
Count: hsi.currentFH.Count,
Sum: hsi.currentFH.Sum,
}
returnValue.CopyTo(fh)
hsi.setLastFH(hsi.currentFH)
return t, fh
}
func (hsi *HistogramStatsIterator) setLastFH(fh *histogram.FloatHistogram) {
if hsi.lastFH == nil {
hsi.lastFH = fh.Copy()
} else {
fh.CopyTo(hsi.lastFH)
}
}
func (hsi *HistogramStatsIterator) getFloatResetHint(hint histogram.CounterResetHint) histogram.CounterResetHint {
if hint != histogram.UnknownCounterReset {
return hint
}
prevFH := hsi.lastFH
if prevFH == nil {
// We don't know if there's a counter reset. Note that this
// generally will trigger an explicit counter reset detection by
// the PromQL engine, which in turn isn't as reliable in this
// case because the PromQL engine will not see the buckets.
// However, we can assume that in cases where the counter reset
// detection is relevant, an iteration through the series has
// happened, and therefore we do not end up here in the first
// place.
return histogram.UnknownCounterReset
}
if hsi.currentFH.DetectReset(prevFH) {
return histogram.CounterReset
}
return histogram.NotCounterReset
}