From b866db009ba2271edf532d50b38afef27c8469ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Rabenstein?= Date: Mon, 29 Nov 2021 10:47:56 +0100 Subject: [PATCH] storage: Fix and improve the Seek method of various iterators (#9878) There was a subtle and nasty bug in listSeriesIterator.Seek. In addition, the Seek call is defined to be a no-op if the current position of the iterator is already pointing to a suitable sample. This commit adds fast paths for this case to several potentially expensive Seek calls. Another bug was in concreteSeriesIterator.Seek. It always searched the whole series and not from the current position of the iterator. Signed-off-by: beorn7 --- storage/merge.go | 5 ++++ storage/remote/codec.go | 12 ++++++-- storage/remote/codec_test.go | 44 +++++++++++++++++++++++++++++ storage/series.go | 6 +++- storage/series_test.go | 54 ++++++++++++++++++++++++++++++++++++ tsdb/tsdbutil/buffer_test.go | 6 +++- 6 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 storage/series_test.go diff --git a/storage/merge.go b/storage/merge.go index 01e667751d..f7246c7c8e 100644 --- a/storage/merge.go +++ b/storage/merge.go @@ -457,6 +457,11 @@ func NewChainSampleIterator(iterators []chunkenc.Iterator) chunkenc.Iterator { } func (c *chainSampleIterator) Seek(t int64) bool { + // No-op check + if c.curr != nil && c.lastt >= t { + return true + } + c.h = samplesIteratorHeap{} for _, iter := range c.iterators { if iter.Seek(t) { diff --git a/storage/remote/codec.go b/storage/remote/codec.go index a9f6af0f38..fa033b5895 100644 --- a/storage/remote/codec.go +++ b/storage/remote/codec.go @@ -357,8 +357,16 @@ func newConcreteSeriersIterator(series *concreteSeries) chunkenc.Iterator { // Seek implements storage.SeriesIterator. func (c *concreteSeriesIterator) Seek(t int64) bool { - c.cur = sort.Search(len(c.series.samples), func(n int) bool { - return c.series.samples[n].Timestamp >= t + if c.cur == -1 { + c.cur = 0 + } + // No-op check. + if s := c.series.samples[c.cur]; s.Timestamp >= t { + return true + } + // Do binary search between current position and end. + c.cur += sort.Search(len(c.series.samples)-c.cur, func(n int) bool { + return c.series.samples[n+c.cur].Timestamp >= t }) return c.cur < len(c.series.samples) } diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index b5949cd318..1432736e13 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -191,6 +191,50 @@ func TestConcreteSeriesClonesLabels(t *testing.T) { require.Equal(t, lbls, gotLabels) } +func TestConcreteSeriesIterator(t *testing.T) { + series := &concreteSeries{ + labels: labels.FromStrings("foo", "bar"), + samples: []prompb.Sample{ + {Value: 1, Timestamp: 1}, + {Value: 1.5, Timestamp: 1}, + {Value: 2, Timestamp: 2}, + {Value: 3, Timestamp: 3}, + {Value: 4, Timestamp: 4}, + }, + } + it := series.Iterator() + + // Seek to the first sample with ts=1. + require.True(t, it.Seek(1)) + ts, v := it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1., v) + + // Seek one further, next sample still has ts=1. + require.True(t, it.Next()) + ts, v = it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1.5, v) + + // Seek again to 1 and make sure we stay where we are. + require.True(t, it.Seek(1)) + ts, v = it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1.5, v) + + // Another seek. + require.True(t, it.Seek(3)) + ts, v = it.At() + require.Equal(t, int64(3), ts) + require.Equal(t, 3., v) + + // And we don't go back. + require.True(t, it.Seek(2)) + ts, v = it.At() + require.Equal(t, int64(3), ts) + require.Equal(t, 3., v) +} + func TestFromQueryResultWithDuplicates(t *testing.T) { ts1 := prompb.TimeSeries{ Labels: []prompb.Label{ diff --git a/storage/series.go b/storage/series.go index 3a5790c2a8..024b0fc72f 100644 --- a/storage/series.go +++ b/storage/series.go @@ -99,8 +99,12 @@ func (it *listSeriesIterator) Seek(t int64) bool { if it.idx == -1 { it.idx = 0 } + // No-op check. + if s := it.samples.Get(it.idx); s.T() >= t { + return true + } // Do binary search between current position and end. - it.idx = sort.Search(it.samples.Len()-it.idx, func(i int) bool { + it.idx += sort.Search(it.samples.Len()-it.idx, func(i int) bool { s := it.samples.Get(i + it.idx) return s.T() >= t }) diff --git a/storage/series_test.go b/storage/series_test.go new file mode 100644 index 0000000000..384009de43 --- /dev/null +++ b/storage/series_test.go @@ -0,0 +1,54 @@ +// Copyright 2021 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestListSeriesIterator(t *testing.T) { + it := NewListSeriesIterator(samples{sample{0, 0}, sample{1, 1}, sample{1, 1.5}, sample{2, 2}, sample{3, 3}}) + + // Seek to the first sample with ts=1. + require.True(t, it.Seek(1)) + ts, v := it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1., v) + + // Seek one further, next sample still has ts=1. + require.True(t, it.Next()) + ts, v = it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1.5, v) + + // Seek again to 1 and make sure we stay where we are. + require.True(t, it.Seek(1)) + ts, v = it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, 1.5, v) + + // Another seek. + require.True(t, it.Seek(3)) + ts, v = it.At() + require.Equal(t, int64(3), ts) + require.Equal(t, 3., v) + + // And we don't go back. + require.True(t, it.Seek(2)) + ts, v = it.At() + require.Equal(t, int64(3), ts) + require.Equal(t, 3., v) +} diff --git a/tsdb/tsdbutil/buffer_test.go b/tsdb/tsdbutil/buffer_test.go index a66786b625..baae8266bb 100644 --- a/tsdb/tsdbutil/buffer_test.go +++ b/tsdb/tsdbutil/buffer_test.go @@ -159,8 +159,12 @@ func (it *listSeriesIterator) Seek(t int64) bool { if it.idx == -1 { it.idx = 0 } + // No-op check. + if s := it.list[it.idx]; s.T() >= t { + return true + } // Do binary search between current position and end. - it.idx = sort.Search(len(it.list)-it.idx, func(i int) bool { + it.idx += sort.Search(len(it.list)-it.idx, func(i int) bool { s := it.list[i+it.idx] return s.t >= t })