Improving "Sparse postings" intersection (#13971)

Lets take the given example:

P1: [2, 5, 9, 18, 21]
P2: [3, 7, 14, 19, 21]
P3: [1, 21]

Currently, we would only advance through P1 and P2 until discovering
an intersection and then checking P3. In essence, the traversal order
was: 2, 3, 5, 7, 9, 14, 18, 19, 21 (intersection found).

With the proposed change, P3 is also examined even if P1 and P2
haven't found an intersection yet. This adjustment allows for the
possibility of skipping some iterations.

Post-change, the traversal order becomes: 2, 3, 21 (3 iterations instead of 9).

Signed-off-by: alanprot <alanprot@gmail.com>
This commit is contained in:
Alan Protasio 2025-08-05 04:22:54 -07:00 committed by GitHub
parent 457a2381f9
commit 25aee26a57
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 4 deletions

View File

@ -620,18 +620,22 @@ func (it *intersectPostings) At() storage.SeriesRef {
}
func (it *intersectPostings) doNext() bool {
Loop:
for {
allEqual := true
for _, p := range it.arr {
if !p.Seek(it.cur) {
return false
}
if p.At() > it.cur {
it.cur = p.At()
continue Loop
allEqual = false
}
}
return true
// if all p.At() are all equal, we found an intersection.
if allEqual {
return true
}
}
}

View File

@ -47,6 +47,7 @@ func BenchmarkQuerier(b *testing.B) {
}
for n := 0; n < 10; n++ {
addSeries(labels.FromStrings("a", strconv.Itoa(n)+postingsBenchSuffix))
for i := 0; i < 100000; i++ {
addSeries(labels.FromStrings("i", strconv.Itoa(i)+postingsBenchSuffix, "n", strconv.Itoa(n)+postingsBenchSuffix, "j", "foo"))
// Have some series that won't be matched, to properly test inverted matches.
@ -101,7 +102,9 @@ func BenchmarkQuerier(b *testing.B) {
func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
ctx := context.Background()
a1 := labels.MustNewMatcher(labels.MatchEqual, "a", "1"+postingsBenchSuffix)
n1 := labels.MustNewMatcher(labels.MatchEqual, "n", "1"+postingsBenchSuffix)
n0_1 := labels.MustNewMatcher(labels.MatchEqual, "n", "0_1"+postingsBenchSuffix)
nX := labels.MustNewMatcher(labels.MatchEqual, "n", "X"+postingsBenchSuffix)
jFoo := labels.MustNewMatcher(labels.MatchEqual, "j", "foo")
@ -137,6 +140,7 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
{`j="foo",n="1"`, []*labels.Matcher{jFoo, n1}},
{`n="1",j!="foo"`, []*labels.Matcher{n1, jNotFoo}},
{`n="1",i!="2"`, []*labels.Matcher{n1, iNot2}},
{`n="0_1",j="foo,a="1"`, []*labels.Matcher{n0_1, jFoo, a1}},
{`n="X",j!="foo"`, []*labels.Matcher{nX, jNotFoo}},
{`i=~"1[0-9]",j=~"foo|bar"`, []*labels.Matcher{iCharSet, jFooBar}},
{`j=~"foo|bar"`, []*labels.Matcher{jFooBar}},
@ -176,8 +180,12 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := PostingsForMatchers(ctx, ir, c.matchers...)
p, err := PostingsForMatchers(ctx, ir, c.matchers...)
require.NoError(b, err)
// Iterate over the postings
for p.Next() {
// Do nothing
}
}
})
}