prometheus/tsdb/chunkenc/benchmark_test.go
György Krajcsovits 5e5b14c04b
feat(chunkenc): replace xoroptst chunk encoding with xor2
XOR2 is based on https://github.com/prometheus/prometheus/pull/18238
With additional ST support.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
2026-03-06 14:35:06 +01:00

344 lines
9.4 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package chunkenc
import (
"errors"
"fmt"
"io"
"math"
"math/rand"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/timestamp"
)
type sampleCase struct {
name string
samples []triple
}
type fmtCase struct {
name string
newChunkFn func() Chunk
stUnsupported bool
}
func foreachFmtSampleCase(b *testing.B, fn func(b *testing.B, f fmtCase, s sampleCase)) {
const nSamples = 120 // Same as tsdb.DefaultSamplesPerChunk.
d, err := time.Parse(time.DateTime, "2025-11-04 10:01:05")
require.NoError(b, err)
var (
r = rand.New(rand.NewSource(1)) // Fixed seed for reproducibility.
initST = timestamp.FromTime(d) // Use realistic timestamp.
initT = initST + 15000 // 15s after initST.
initV = 1243535.123
rInts = make([]int64, 2*nSamples) // Random ints for timestamps and STs.
rFloats = make([]float64, nSamples)
)
// Pre-generate random numbers so that adding/removing cases does not change
// the generated samples.
for i := range nSamples {
rInts[i] = int64(r.Intn(100))
rInts[nSamples+i] = int64(r.Intn(100))
rFloats[i] = float64(r.Intn(100))
}
// tPatterns control how the regular timestamp advances.
type tPattern struct {
name string
next func(t int64, i int) int64
}
// vPatterns control how the value advances.
type vPattern struct {
name string
next func(v float64, i int) float64
}
// stPatterns compute the start timestamp from the previous t (before the
// step), the new t (after the step), and the sample index.
type stPattern struct {
name string
compute func(prevT, newT int64, i int) int64
}
tPatterns := []tPattern{
{
name: "t=constant",
next: func(t int64, _ int) int64 { return t + 15000 },
},
{
// 15 seconds ± up to 100ms of jitter.
name: "t=jitter",
next: func(t int64, i int) int64 { return t + rInts[i] - 50 + 15000 },
},
{
// First 10 samples at constant 60s, then one 10-interval gap (600s),
// then 60s ± 30ms jitter. The gap triggers XOR18111 full mode via
// multiplier encoding (dod=540000 = 9×60000). Subsequent small-jitter
// delta-of-deltas (≤30ms) use XOR18111's 7-bit full-mode code (9 bits
// total) vs XOR compact's minimum 14-bit code (16 bits total).
name: "t=gap-jitter",
next: func(t int64, i int) int64 {
if i < 10 {
return t + 60000
}
if i == 10 {
return t + 10*60000 // 10-interval gap; triggers XOR18111 full mode.
}
return t + 60000 + rInts[i]%61 - 30 // 60s ± 30ms jitter.
},
},
}
vPatterns := []vPattern{
{
name: "v=constant",
next: func(v float64, _ int) float64 { return v },
},
// We are not interested in float compression we're not changing it.
// {
// // Varying from -50 to +50 in 100 discrete steps.
// name: "v=rand-steps",
// next: func(v float64, i int) float64 { return v + rFloats[i] - 50 },
// },
// {
// // Random increment between 0 and 1.0.
// name: "v=rand0-1",
// next: func(v float64, i int) float64 { return v + rFloats[i]/100.0 },
// },
// {
// // Random decrement between 0 and -1.0. Tests negative varint encoding;
// // see https://victoriametrics.com/blog/go-protobuf/.
// name: "v=nrand0-1",
// next: func(v float64, i int) float64 { return v - rFloats[i]/100.0 },
// },
}
stPatterns := []stPattern{
{
name: "st=0",
compute: func(_, _ int64, _ int) int64 { return 0 },
},
{
// Constant ST throughout the chunk, typical for long-running counters.
name: "st=cumulative",
compute: func(_, _ int64, _ int) int64 { return initST },
},
{
// ST is just after the previous sample's t: tight delta interval.
name: "st=delta-excl",
compute: func(prevT, _ int64, _ int) int64 { return prevT + 1 },
},
{
// ST equals the previous sample's t: inclusive delta interval.
name: "st=delta-incl",
compute: func(prevT, _ int64, _ int) int64 { return prevT },
},
{
// ST equals the current sample's t.
name: "st=t",
compute: func(_, newT int64, _ int) int64 { return newT },
},
{
// ST is equal to the previous t plus up to 100ms of jitter.
name: "st=delta-jitter",
compute: func(prevT, _ int64, i int) int64 { return prevT + rInts[nSamples+i] },
},
{
// Cumulative ST with periodic resets 10s before the current t.
name: "st=cum-resets",
compute: func(_, newT int64, i int) int64 {
if i%6 == 5 {
return newT - 10000
}
return initST
},
},
{
// Cumulative ST with periodic zero resets.
name: "st=cum-zeros",
compute: func(_, _ int64, i int) int64 {
if i%6 == 5 {
return 0
}
return initST
},
},
}
var sampleCases []sampleCase
for _, tp := range tPatterns {
for _, vp := range vPatterns {
for _, sp := range stPatterns {
samples := make([]triple, 0, nSamples)
t, v := initT, initV
for i := range nSamples {
prevT := t
t = tp.next(t, i)
v = vp.next(v, i)
st := sp.compute(prevT, t, i)
samples = append(samples, triple{st: st, t: t, v: v})
}
sampleCases = append(sampleCases, sampleCase{
name: tp.name + "/" + vp.name + "/" + sp.name,
samples: samples,
})
}
}
}
for _, f := range []fmtCase{
{name: "XOR", newChunkFn: func() Chunk { return NewXORChunk() }, stUnsupported: true},
{name: "XOR2", newChunkFn: func() Chunk { return NewXOR2Chunk() }},
} {
for _, s := range sampleCases {
b.Run(fmt.Sprintf("fmt=%s/%s", f.name, s.name), func(b *testing.B) {
fn(b, f, s)
})
}
}
}
/*
export bench=bw.bench/append.v2 && go test \
-run '^$' -bench '^BenchmarkAppender' \
-benchtime 1s -count 6 -cpu 2 -timeout 999m \
| tee ${bench}.txt
For profiles:
export bench=bw.bench/appendprof && go test \
-run '^$' -bench '^BenchmarkAppender' \
-benchtime 1s -count 1 -cpu 2 -timeout 999m \
-cpuprofile=${bench}.cpu.pprof \
| tee ${bench}.txt
*/
func BenchmarkAppender(b *testing.B) {
foreachFmtSampleCase(b, func(b *testing.B, f fmtCase, s sampleCase) {
b.ReportAllocs()
for b.Loop() {
c := f.newChunkFn()
a, err := c.Appender()
if err != nil {
b.Fatalf("get appender: %s", err)
}
for _, p := range s.samples {
a.Append(p.st, p.t, p.v)
}
// NOTE: Some buffered implementations only encode on Bytes().
b.ReportMetric(float64(len(c.Bytes())), "B/chunk")
require.Equal(b, len(s.samples), c.NumSamples())
}
})
}
/*
export bench=bw.bench/iter && go test \
-run '^$' -bench '^BenchmarkIterator' \
-benchtime 1s -count 6 -cpu 2 -timeout 999m \
| tee ${bench}.txt
For profiles:
export bench=bw.bench/iterprof && go test \
-run '^$' -bench '^BenchmarkIterator' \
-benchtime 1000000x -count 1 -cpu 2 -timeout 999m \
-cpuprofile=${bench}.cpu.pprof \
| tee ${bench}.txt
export bench=bw.bench/iterprof && go test \
-run '^$' -bench '^BenchmarkIterator' \
-benchtime 1000000x -count 1 -cpu 2 -timeout 999m \
-memprofile=${bench}.mem.pprof \
| tee ${bench}.txt
*/
func BenchmarkIterator(b *testing.B) {
foreachFmtSampleCase(b, func(b *testing.B, f fmtCase, s sampleCase) {
floatEquals := func(a, b float64) bool {
return a == b
}
if f.name == "ALPBuffered" {
// Hack as ALP loses precision.
floatEquals = func(a, b float64) bool {
return math.Abs(a-b) < 1e-6
}
}
b.ReportAllocs()
c := f.newChunkFn()
a, err := c.Appender()
if err != nil {
b.Fatalf("get appender: %s", err)
}
for _, p := range s.samples {
a.Append(p.st, p.t, p.v)
}
// Some chunk implementations might be buffered. Reset to ensure we don't reuse
// appending buffers.
c.Reset(c.Bytes())
// While we are at it, test if encoding/decoding works.
it := c.Iterator(nil)
require.Equal(b, len(s.samples), c.NumSamples())
var got []triple
for i := 0; it.Next() == ValFloat; i++ {
t, v := it.At()
got = append(got, triple{st: it.AtST(), t: t, v: v})
}
if err := it.Err(); err != nil && !errors.Is(err, io.EOF) {
require.NoError(b, err)
}
expectedSamples := s.samples
if f.stUnsupported {
// If the format does not support ST, zero them out for comparison.
expectedSamples = make([]triple, len(s.samples))
copy(expectedSamples, s.samples)
for i := range s.samples {
expectedSamples[i].st = 0
}
}
if diff := cmp.Diff(expectedSamples, got, cmp.AllowUnexported(triple{}), cmp.Comparer(floatEquals)); diff != "" {
b.Fatalf("mismatch (-want +got):\n%s", diff)
}
var sink float64
// Measure decoding efficiency.
for i := 0; b.Loop(); {
// Some chunk implementations might be buffered. Reset to ensure we don't reuse
// previous decoded data.
c.Reset(c.Bytes())
b.ReportMetric(float64(len(c.Bytes())), "B/chunk")
it := c.Iterator(it)
for it.Next() == ValFloat {
_, v := it.At()
sink = v
i++
}
if err := it.Err(); err != nil && !errors.Is(err, io.EOF) {
require.NoError(b, err)
}
_ = sink
}
})
}