model/histogram: Make histogram bucket iterators more robust

Currently, iterating over histogram buckets can panic if the spans are
not consistent with the buckets. We aim for validating histograms upon
ingestion, but there might still be data corruptions on disk that
could trigger the panic. While data corruption on disk is really bad
and will lead to all kind of weirdness, we should still avoid
panic'ing.

Note, though, that chunks are secured by checksums, so the corruptions
won't realistically happen because of disk faults, but more likely
because a chunk was generated in a faulty way in the first place, by
a software bug or even maliciously.

This commit prevents panics in the situation where there are fewer
buckets than described by the spans. Note that the missing buckets
will simply not be iterated over. There is no signalling of this
problem. We might still consider this separately, but for now, I would
say that this kind of corruption is exceedingly rare and doesn't
deserve special treatment (which will add a whole lot of complexity to
the code).

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2025-11-18 22:59:46 +01:00
parent 1174b0ce4f
commit 2dfc324821
4 changed files with 393 additions and 41 deletions

View File

@ -1050,16 +1050,21 @@ func (i *floatBucketIterator) Next() bool {
if i.spansIdx >= len(i.spans) {
return false
}
span := i.spans[i.spansIdx]
if i.schema == i.targetSchema {
// Fast path for the common case.
span := i.spans[i.spansIdx]
if i.bucketsIdx == 0 {
// Seed origIdx for the first bucket.
i.currIdx = span.Offset
} else {
i.currIdx++
}
if i.bucketsIdx >= len(i.buckets) {
// This protects against index out of range panic, which
// can only happen with an invalid histogram.
return false
}
for i.idxInSpan >= span.Length {
// We have exhausted the current span and have to find a new
@ -1080,7 +1085,6 @@ func (i *floatBucketIterator) Next() bool {
// Copy all of these into local variables so that we can forward to the
// next bucket and then roll back if needed.
origIdx, spansIdx, idxInSpan := i.origIdx, i.spansIdx, i.idxInSpan
span := i.spans[spansIdx]
firstPass := true
i.currCount = 0
@ -1092,6 +1096,14 @@ func (i *floatBucketIterator) Next() bool {
} else {
origIdx++
}
if i.bucketsIdx >= len(i.buckets) {
// This protects against index out of range panic, which
// can only happen with an invalid histogram.
if firstPass {
return false
}
break mergeLoop
}
for idxInSpan >= span.Length {
// We have exhausted the current span and have to find a new
// one. We even handle pathologic spans of length 0 here.
@ -1152,6 +1164,11 @@ func (i *reverseFloatBucketIterator) Next() bool {
// We have exhausted the current span and have to find a new
// one. We'll even handle pathologic spans of length 0.
i.spansIdx--
if i.spansIdx < 0 {
// This protects against index out of range panic, which
// can only happen with an invalid histogram.
return false
}
i.idxInSpan = int32(i.spans[i.spansIdx].Length) - 1
i.currIdx -= i.spans[i.spansIdx+1].Offset
}

View File

@ -3344,6 +3344,84 @@ func TestAllReverseFloatBucketIterator(t *testing.T) {
includeZero: true,
includePos: true,
},
{
h: FloatHistogram{
Count: 405,
ZeroCount: 102,
ZeroThreshold: 0.001,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
// Spans expect one more bucket than listed
// here. We mostly want to make sure here that
// no panic happens. Data is invalid anyway, so
// there is no real "correct" data anymore.
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
// Spans expect one more bucket than listed
// here. We mostly want to make sure here that
// no panic happens. Data is invalid anyway, so
// there is no real "correct" data anymore.
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235},
},
includeNeg: true,
includeZero: true,
includePos: true,
},
{
h: FloatHistogram{
Count: 447,
ZeroCount: 42,
ZeroThreshold: 0.6, // Within the bucket closest to zero.
Sum: 1008.4,
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 0},
{Offset: 3, Length: 3},
{Offset: 3, Length: 0},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
// One more bucket listed here than expected by
// the spans. We mostly want to make sure here
// that no panic happens. Data is invalid
// anyway, so there is no real "correct" data
// anymore.
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33, 42},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 1, Length: 0},
{Offset: 3, Length: 0},
{Offset: 3, Length: 4},
{Offset: 2, Length: 0},
{Offset: 5, Length: 3},
},
// One more bucket listed here than expected by
// the spans. We mostly want to make sure here
// that no panic happens. Data is invalid
// anyway, so there is no real "correct" data
// anymore.
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33, 42},
},
includeNeg: true,
includeZero: true,
includePos: true,
},
}
for i, c := range cases {
@ -3391,50 +3469,217 @@ func TestAllReverseFloatBucketIterator(t *testing.T) {
}
func TestFloatBucketIteratorTargetSchema(t *testing.T) {
h := FloatHistogram{
Count: 405,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 3},
{Offset: 2, Length: 3},
cases := map[string]struct {
h FloatHistogram
expPositiveBuckets []Bucket[float64]
expNegativeBuckets []Bucket[float64]
}{
"regular": {
h: FloatHistogram{
Count: 405,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 3},
{Offset: 2, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 7, Length: 4},
{Offset: 1, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
},
expPositiveBuckets: []Bucket[float64]{
{Lower: 0.25, Upper: 1, LowerInclusive: false, UpperInclusive: true, Count: 100, Index: 0},
{Lower: 1, Upper: 4, LowerInclusive: false, UpperInclusive: true, Count: 522, Index: 1},
{Lower: 4, Upper: 16, LowerInclusive: false, UpperInclusive: true, Count: 68, Index: 2},
{Lower: 16, Upper: 64, LowerInclusive: false, UpperInclusive: true, Count: 322, Index: 3},
},
expNegativeBuckets: []Bucket[float64]{
{Lower: -1, Upper: -0.25, LowerInclusive: true, UpperInclusive: false, Count: 10, Index: 0},
{Lower: -4, Upper: -1, LowerInclusive: true, UpperInclusive: false, Count: 1264, Index: 1},
{Lower: -64, Upper: -16, LowerInclusive: true, UpperInclusive: false, Count: 184, Index: 3},
{Lower: -256, Upper: -64, LowerInclusive: true, UpperInclusive: false, Count: 791, Index: 4},
{Lower: -1024, Upper: -256, LowerInclusive: true, UpperInclusive: false, Count: 33, Index: 5},
},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 7, Length: 4},
{Offset: 1, Length: 3},
"missing buckets": {
// One fewer bucket than expected based on spans. This
// can only happen with invalid histograms. We still
// want to handle it gracefully, essentially by
// considering the missing bucket as empty.
h: FloatHistogram{
Count: 405,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 3},
{Offset: 2, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 7, Length: 4},
{Offset: 1, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235},
},
expPositiveBuckets: []Bucket[float64]{
{Lower: 0.25, Upper: 1, LowerInclusive: false, UpperInclusive: true, Count: 100, Index: 0},
{Lower: 1, Upper: 4, LowerInclusive: false, UpperInclusive: true, Count: 522, Index: 1},
{Lower: 4, Upper: 16, LowerInclusive: false, UpperInclusive: true, Count: 68, Index: 2},
{Lower: 16, Upper: 64, LowerInclusive: false, UpperInclusive: true, Count: 289, Index: 3},
},
expNegativeBuckets: []Bucket[float64]{
{Lower: -1, Upper: -0.25, LowerInclusive: true, UpperInclusive: false, Count: 10, Index: 0},
{Lower: -4, Upper: -1, LowerInclusive: true, UpperInclusive: false, Count: 1264, Index: 1},
{Lower: -64, Upper: -16, LowerInclusive: true, UpperInclusive: false, Count: 184, Index: 3},
{Lower: -256, Upper: -64, LowerInclusive: true, UpperInclusive: false, Count: 791, Index: 4},
},
},
"spurious bucket": {
// One more bucket than expected based on spans. This
// can only happen with invalid histograms. We still
// want to handle it gracefully, essentially by ignoring
// the spurious bucket.
h: FloatHistogram{
Count: 405,
Sum: 1008.4,
Schema: 1,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: 1, Length: 3},
{Offset: 2, Length: 3},
},
PositiveBuckets: []float64{100, 344, 123, 55, 3, 63, 2, 54, 235, 33, 42},
NegativeSpans: []Span{
{Offset: 0, Length: 3},
{Offset: 7, Length: 4},
{Offset: 1, Length: 3},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33, 42},
},
expPositiveBuckets: []Bucket[float64]{
{Lower: 0.25, Upper: 1, LowerInclusive: false, UpperInclusive: true, Count: 100, Index: 0},
{Lower: 1, Upper: 4, LowerInclusive: false, UpperInclusive: true, Count: 522, Index: 1},
{Lower: 4, Upper: 16, LowerInclusive: false, UpperInclusive: true, Count: 68, Index: 2},
{Lower: 16, Upper: 64, LowerInclusive: false, UpperInclusive: true, Count: 322, Index: 3},
},
expNegativeBuckets: []Bucket[float64]{
{Lower: -1, Upper: -0.25, LowerInclusive: true, UpperInclusive: false, Count: 10, Index: 0},
{Lower: -4, Upper: -1, LowerInclusive: true, UpperInclusive: false, Count: 1264, Index: 1},
{Lower: -64, Upper: -16, LowerInclusive: true, UpperInclusive: false, Count: 184, Index: 3},
{Lower: -256, Upper: -64, LowerInclusive: true, UpperInclusive: false, Count: 791, Index: 4},
{Lower: -1024, Upper: -256, LowerInclusive: true, UpperInclusive: false, Count: 33, Index: 5},
},
},
"no schema change": {
h: FloatHistogram{
Count: 405,
Sum: 1008.4,
Schema: -1,
PositiveSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []float64{100, 522, 68, 322},
NegativeSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []float64{100, 522, 68, 322},
},
expPositiveBuckets: []Bucket[float64]{
{Lower: 0.25, Upper: 1, LowerInclusive: false, UpperInclusive: true, Count: 100, Index: 0},
{Lower: 1, Upper: 4, LowerInclusive: false, UpperInclusive: true, Count: 522, Index: 1},
{Lower: 16, Upper: 64, LowerInclusive: false, UpperInclusive: true, Count: 68, Index: 3},
{Lower: 64, Upper: 256, LowerInclusive: false, UpperInclusive: true, Count: 322, Index: 4},
},
expNegativeBuckets: []Bucket[float64]{
{Lower: -1, Upper: -0.25, LowerInclusive: true, UpperInclusive: false, Count: 100, Index: 0},
{Lower: -4, Upper: -1, LowerInclusive: true, UpperInclusive: false, Count: 522, Index: 1},
{Lower: -64, Upper: -16, LowerInclusive: true, UpperInclusive: false, Count: 68, Index: 3},
{Lower: -256, Upper: -64, LowerInclusive: true, UpperInclusive: false, Count: 322, Index: 4},
},
},
"no schema change, missing bucket": {
h: FloatHistogram{
Count: 405,
Sum: 1008.4,
Schema: -1,
PositiveSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []float64{100, 522, 68},
NegativeSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []float64{100, 522, 68},
},
expPositiveBuckets: []Bucket[float64]{
{Lower: 0.25, Upper: 1, LowerInclusive: false, UpperInclusive: true, Count: 100, Index: 0},
{Lower: 1, Upper: 4, LowerInclusive: false, UpperInclusive: true, Count: 522, Index: 1},
{Lower: 16, Upper: 64, LowerInclusive: false, UpperInclusive: true, Count: 68, Index: 3},
},
expNegativeBuckets: []Bucket[float64]{
{Lower: -1, Upper: -0.25, LowerInclusive: true, UpperInclusive: false, Count: 100, Index: 0},
{Lower: -4, Upper: -1, LowerInclusive: true, UpperInclusive: false, Count: 522, Index: 1},
{Lower: -64, Upper: -16, LowerInclusive: true, UpperInclusive: false, Count: 68, Index: 3},
},
},
"no schema change, spurious bucket": {
h: FloatHistogram{
Count: 405,
Sum: 1008.4,
Schema: -1,
PositiveSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
PositiveBuckets: []float64{100, 522, 68, 322, 42},
NegativeSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
NegativeBuckets: []float64{100, 522, 68, 322, 42},
},
expPositiveBuckets: []Bucket[float64]{
{Lower: 0.25, Upper: 1, LowerInclusive: false, UpperInclusive: true, Count: 100, Index: 0},
{Lower: 1, Upper: 4, LowerInclusive: false, UpperInclusive: true, Count: 522, Index: 1},
{Lower: 16, Upper: 64, LowerInclusive: false, UpperInclusive: true, Count: 68, Index: 3},
{Lower: 64, Upper: 256, LowerInclusive: false, UpperInclusive: true, Count: 322, Index: 4},
},
expNegativeBuckets: []Bucket[float64]{
{Lower: -1, Upper: -0.25, LowerInclusive: true, UpperInclusive: false, Count: 100, Index: 0},
{Lower: -4, Upper: -1, LowerInclusive: true, UpperInclusive: false, Count: 522, Index: 1},
{Lower: -64, Upper: -16, LowerInclusive: true, UpperInclusive: false, Count: 68, Index: 3},
{Lower: -256, Upper: -64, LowerInclusive: true, UpperInclusive: false, Count: 322, Index: 4},
},
},
NegativeBuckets: []float64{10, 34, 1230, 54, 67, 63, 2, 554, 235, 33},
}
expPositiveBuckets := []Bucket[float64]{
{Lower: 0.25, Upper: 1, LowerInclusive: false, UpperInclusive: true, Count: 100, Index: 0},
{Lower: 1, Upper: 4, LowerInclusive: false, UpperInclusive: true, Count: 522, Index: 1},
{Lower: 4, Upper: 16, LowerInclusive: false, UpperInclusive: true, Count: 68, Index: 2},
{Lower: 16, Upper: 64, LowerInclusive: false, UpperInclusive: true, Count: 322, Index: 3},
}
expNegativeBuckets := []Bucket[float64]{
{Lower: -1, Upper: -0.25, LowerInclusive: true, UpperInclusive: false, Count: 10, Index: 0},
{Lower: -4, Upper: -1, LowerInclusive: true, UpperInclusive: false, Count: 1264, Index: 1},
{Lower: -64, Upper: -16, LowerInclusive: true, UpperInclusive: false, Count: 184, Index: 3},
{Lower: -256, Upper: -64, LowerInclusive: true, UpperInclusive: false, Count: 791, Index: 4},
{Lower: -1024, Upper: -256, LowerInclusive: true, UpperInclusive: false, Count: 33, Index: 5},
}
for tn, tc := range cases {
t.Run(tn, func(t *testing.T) {
it := tc.h.floatBucketIterator(true, 0, -1)
for i, b := range tc.expPositiveBuckets {
require.True(t, it.Next(), "positive iterator exhausted too early")
require.Equal(t, b, it.At(), "bucket %d", i)
}
require.False(t, it.Next(), "positive iterator not exhausted")
it := h.floatBucketIterator(true, 0, -1)
for i, b := range expPositiveBuckets {
require.True(t, it.Next(), "positive iterator exhausted too early")
require.Equal(t, b, it.At(), "bucket %d", i)
it = tc.h.floatBucketIterator(false, 0, -1)
for i, b := range tc.expNegativeBuckets {
require.True(t, it.Next(), "negative iterator exhausted too early")
require.Equal(t, b, it.At(), "bucket %d", i)
}
require.False(t, it.Next(), "negative iterator not exhausted")
})
}
require.False(t, it.Next(), "positive iterator not exhausted")
it = h.floatBucketIterator(false, 0, -1)
for i, b := range expNegativeBuckets {
require.True(t, it.Next(), "negative iterator exhausted too early")
require.Equal(t, b, it.At(), "bucket %d", i)
}
require.False(t, it.Next(), "negative iterator not exhausted")
}
func TestFloatCustomBucketsIterators(t *testing.T) {

View File

@ -515,6 +515,11 @@ func (r *regularBucketIterator) Next() bool {
r.currIdx += span.Offset
}
// This protects against index out of range panic, which
// can only happen with an invalid histogram.
if r.bucketsIdx >= len(r.buckets) {
return false
}
r.currCount += r.buckets[r.bucketsIdx]
r.idxInSpan++
r.bucketsIdx++
@ -576,6 +581,11 @@ func (c *cumulativeBucketIterator) Next() bool {
c.initialized = true
}
// This protects against index out of range panic, which
// can only happen with an invalid histogram.
if c.posBucketsIdx >= len(c.h.PositiveBuckets) {
return false
}
c.currCount += c.h.PositiveBuckets[c.posBucketsIdx]
c.currCumulativeCount += uint64(c.currCount)
c.currUpper = getBound(c.currIdx, c.h.Schema, c.h.CustomValues)

View File

@ -243,6 +243,46 @@ func TestCumulativeBucketIterator(t *testing.T) {
{Lower: math.Inf(-1), Upper: math.Inf(1), Count: 5, LowerInclusive: true, UpperInclusive: true, Index: 4},
},
},
{
histogram: Histogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 2},
},
// One spurious bucket, which we expect to be ignored.
PositiveBuckets: []int64{1, 1, -1, 0, 2},
},
expectedBuckets: []Bucket[uint64]{
{Lower: math.Inf(-1), Upper: 1, Count: 1, LowerInclusive: true, UpperInclusive: true, Index: 0},
{Lower: math.Inf(-1), Upper: 2, Count: 3, LowerInclusive: true, UpperInclusive: true, Index: 1},
{Lower: math.Inf(-1), Upper: 4, Count: 3, LowerInclusive: true, UpperInclusive: true, Index: 2},
{Lower: math.Inf(-1), Upper: 8, Count: 4, LowerInclusive: true, UpperInclusive: true, Index: 3},
{Lower: math.Inf(-1), Upper: 16, Count: 5, LowerInclusive: true, UpperInclusive: true, Index: 4},
},
},
{
histogram: Histogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 2},
{Offset: 1, Length: 3},
},
// One bucket is missing. We expect the iteration to end with the last bucket.
PositiveBuckets: []int64{1, 1, -1, 0},
},
expectedBuckets: []Bucket[uint64]{
{Lower: math.Inf(-1), Upper: 1, Count: 1, LowerInclusive: true, UpperInclusive: true, Index: 0},
{Lower: math.Inf(-1), Upper: 2, Count: 3, LowerInclusive: true, UpperInclusive: true, Index: 1},
{Lower: math.Inf(-1), Upper: 4, Count: 3, LowerInclusive: true, UpperInclusive: true, Index: 2},
{Lower: math.Inf(-1), Upper: 8, Count: 4, LowerInclusive: true, UpperInclusive: true, Index: 3},
{Lower: math.Inf(-1), Upper: 16, Count: 5, LowerInclusive: true, UpperInclusive: true, Index: 4},
},
},
}
for i, c := range cases {
@ -459,6 +499,46 @@ func TestRegularBucketIterator(t *testing.T) {
},
expectedNegativeBuckets: []Bucket[uint64]{},
},
{
histogram: Histogram{
Schema: 0,
NegativeSpans: []Span{
{Offset: 0, Length: 5},
{Offset: 1, Length: 1},
},
// One spurious bucket, which we expect to be ignored.
NegativeBuckets: []int64{1, 2, -2, 1, -1, 0, 3},
},
expectedPositiveBuckets: []Bucket[uint64]{},
expectedNegativeBuckets: []Bucket[uint64]{
{Lower: -1, Upper: -0.5, Count: 1, LowerInclusive: true, UpperInclusive: false, Index: 0},
{Lower: -2, Upper: -1, Count: 3, LowerInclusive: true, UpperInclusive: false, Index: 1},
{Lower: -4, Upper: -2, Count: 1, LowerInclusive: true, UpperInclusive: false, Index: 2},
{Lower: -8, Upper: -4, Count: 2, LowerInclusive: true, UpperInclusive: false, Index: 3},
{Lower: -16, Upper: -8, Count: 1, LowerInclusive: true, UpperInclusive: false, Index: 4},
{Lower: -64, Upper: -32, Count: 1, LowerInclusive: true, UpperInclusive: false, Index: 6},
},
},
{
histogram: Histogram{
Schema: 0,
NegativeSpans: []Span{
{Offset: 0, Length: 5},
{Offset: 1, Length: 1},
},
// One bucket is missing. We expect the iteration to end with the last bucket.
NegativeBuckets: []int64{1, 2, -2, 1, -1},
},
expectedPositiveBuckets: []Bucket[uint64]{},
expectedNegativeBuckets: []Bucket[uint64]{
{Lower: -1, Upper: -0.5, Count: 1, LowerInclusive: true, UpperInclusive: false, Index: 0},
{Lower: -2, Upper: -1, Count: 3, LowerInclusive: true, UpperInclusive: false, Index: 1},
{Lower: -4, Upper: -2, Count: 1, LowerInclusive: true, UpperInclusive: false, Index: 2},
{Lower: -8, Upper: -4, Count: 2, LowerInclusive: true, UpperInclusive: false, Index: 3},
{Lower: -16, Upper: -8, Count: 1, LowerInclusive: true, UpperInclusive: false, Index: 4},
},
},
}
for i, c := range cases {