[PERF] PromQL: eliminate string-keyed maps in binary vector matching (#17131)

In this PR, we are eliminating expensive string-keyed (by signature) maps that are accessed for every sample processed. During preprocessing in rangeEval, we assign a unique number from 0 to n-1 to each of the n string signature values, and later only use this number as a label set signature.

Signed-off-by: Linas Medžiūnas <linasm@users.noreply.github.com>
Co-authored-by: George Krajcsovits <krajorama@users.noreply.github.com>
This commit is contained in:
Linas Medžiūnas 2025-11-12 12:43:05 +02:00 committed by GitHub
parent 33082be0e2
commit f330ccaf2f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 86 additions and 72 deletions

View File

@ -1144,8 +1144,13 @@ func (ev *evaluator) Eval(ctx context.Context, expr parser.Expr) (v parser.Value
// EvalSeriesHelper stores extra information about a series.
type EvalSeriesHelper struct {
// Used to map left-hand to right-hand in binary operations.
signature string
// Ordinal number of join signature, used to map left-hand to right-hand in
// binary operations. For example given the following series, if
// the join signature is job, instance then:
// metric{job="a", instance="1", other="x"} -> sigOrdinal 0
// metric{job="a", instance="1", other="y"} -> sigOrdinal 0
// metric{job="a", instance="2", other="x"} -> sigOrdinal 1
sigOrdinal int
}
// EvalNodeHelper stores extra information and caches for evaluating a single node across steps.
@ -1165,9 +1170,13 @@ type EvalNodeHelper struct {
lblResultBuf []byte
// For binary vector matching.
rightSigs map[string]Sample
matchedSigs map[string]map[uint64]struct{}
rightSigs map[int]Sample
matchedSigs map[int]map[uint64]struct{}
resultMetric map[string]labels.Labels
numSigs int
// For info series matching.
rightStrSigs map[string]Sample
// Additional options for the evaluation.
enableDelayedNameRemoval bool
@ -1252,11 +1261,12 @@ func (enh *EvalNodeHelper) resetHistograms(inVec Vector, arg parser.Expr) annota
// function call results.
// The prepSeries function (if provided) can be used to prepare the helper
// for each series, then passed to each call funcCall.
func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Labels, *EvalSeriesHelper), funcCall func([]Vector, Matrix, [][]EvalSeriesHelper, *EvalNodeHelper) (Vector, annotations.Annotations), exprs ...parser.Expr) (Matrix, annotations.Annotations) {
func (ev *evaluator) rangeEval(ctx context.Context, matching *parser.VectorMatching, funcCall func([]Vector, Matrix, [][]EvalSeriesHelper, *EvalNodeHelper) (Vector, annotations.Annotations), exprs ...parser.Expr) (Matrix, annotations.Annotations) {
numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1
matrixes := make([]Matrix, len(exprs))
origMatrixes := make([]Matrix, len(exprs))
originalNumSamples := ev.currentSamples
useSignatures := matching != nil
var warnings annotations.Annotations
for i, e := range exprs {
@ -1297,18 +1307,46 @@ func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Label
bufHelpers [][]EvalSeriesHelper // Buffer updated on each step
)
// If the series preparation function is provided, we should run it for
// every single series in the matrix.
if prepSeries != nil {
if useSignatures {
var (
// Function to compute the join signature for each series.
sigf func(labels.Labels) string
buf = make([]byte, 0, 1024)
names = slices.Clone(matching.MatchingLabels)
)
if matching.On {
slices.Sort(names)
sigf = func(lset labels.Labels) string {
return string(lset.BytesWithLabels(buf, names...))
}
} else { // "without"
names = append([]string{labels.MetricName}, names...)
slices.Sort(names)
sigf = func(lset labels.Labels) string {
return string(lset.BytesWithoutLabels(buf, names...))
}
}
seriesHelpers = make([][]EvalSeriesHelper, len(exprs))
bufHelpers = make([][]EvalSeriesHelper, len(exprs))
signatureToOrdinal := make(map[string]int)
for i := range exprs {
seriesHelpers[i] = make([]EvalSeriesHelper, len(matrixes[i]))
bufHelpers[i] = make([]EvalSeriesHelper, len(matrixes[i]))
for si, series := range matrixes[i] {
prepSeries(series.Metric, &seriesHelpers[i][si])
strSig := sigf(series.Metric)
if sigOrd, ok := signatureToOrdinal[strSig]; ok {
seriesHelpers[i][si] = EvalSeriesHelper{sigOrdinal: sigOrd}
continue
}
signatureToOrdinal[strSig] = enh.numSigs
seriesHelpers[i][si] = EvalSeriesHelper{sigOrdinal: enh.numSigs}
enh.numSigs++
}
}
}
@ -1323,12 +1361,12 @@ func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Label
for i := range exprs {
var bh []EvalSeriesHelper
var sh []EvalSeriesHelper
if prepSeries != nil {
if useSignatures {
bh = bufHelpers[i][:0]
sh = seriesHelpers[i]
}
vectors[i], bh = ev.gatherVector(ts, matrixes[i], vectors[i], bh, sh)
if prepSeries != nil {
if useSignatures {
bufHelpers[i] = bh
}
}
@ -2129,27 +2167,21 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value,
return append(enh.Out, Sample{F: val}), nil
}, e.LHS, e.RHS)
case lt == parser.ValueTypeVector && rt == parser.ValueTypeVector:
// Function to compute the join signature for each series.
buf := make([]byte, 0, 1024)
sigf := signatureFunc(e.VectorMatching.On, buf, e.VectorMatching.MatchingLabels...)
initSignatures := func(series labels.Labels, h *EvalSeriesHelper) {
h.signature = sigf(series)
}
switch e.Op {
case parser.LAND:
return ev.rangeEval(ctx, initSignatures, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
return ev.rangeEval(ctx, e.VectorMatching, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
return ev.VectorAnd(v[0], v[1], e.VectorMatching, sh[0], sh[1], enh), nil
}, e.LHS, e.RHS)
case parser.LOR:
return ev.rangeEval(ctx, initSignatures, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
return ev.rangeEval(ctx, e.VectorMatching, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
return ev.VectorOr(v[0], v[1], e.VectorMatching, sh[0], sh[1], enh), nil
}, e.LHS, e.RHS)
case parser.LUNLESS:
return ev.rangeEval(ctx, initSignatures, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
return ev.rangeEval(ctx, e.VectorMatching, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
return ev.VectorUnless(v[0], v[1], e.VectorMatching, sh[0], sh[1], enh), nil
}, e.LHS, e.RHS)
default:
return ev.rangeEval(ctx, initSignatures, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
return ev.rangeEval(ctx, e.VectorMatching, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
vec, err := ev.VectorBinop(e.Op, v[0], v[1], e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh, e.PositionRange())
return vec, handleVectorBinopError(err, e)
}, e.LHS, e.RHS)
@ -2720,16 +2752,15 @@ func (*evaluator) VectorAnd(lhs, rhs Vector, matching *parser.VectorMatching, lh
return nil // Short-circuit: AND with nothing is nothing.
}
// The set of signatures for the right-hand side Vector.
rightSigs := map[string]struct{}{}
// Add all rhs samples to a map so we can easily find matches later.
// Ordinals of signatures present on the right-hand side.
rightSigOrdinalsPresent := make([]bool, enh.numSigs)
for _, sh := range rhsh {
rightSigs[sh.signature] = struct{}{}
rightSigOrdinalsPresent[sh.sigOrdinal] = true
}
for i, ls := range lhs {
// If there's a matching entry in the right-hand side Vector, add the sample.
if _, ok := rightSigs[lhsh[i].signature]; ok {
if rightSigOrdinalsPresent[lhsh[i].sigOrdinal] {
enh.Out = append(enh.Out, ls)
}
}
@ -2748,15 +2779,15 @@ func (*evaluator) VectorOr(lhs, rhs Vector, matching *parser.VectorMatching, lhs
return enh.Out
}
leftSigs := map[string]struct{}{}
leftSigOrdinalsPresent := make([]bool, enh.numSigs)
// Add everything from the left-hand-side Vector.
for i, ls := range lhs {
leftSigs[lhsh[i].signature] = struct{}{}
leftSigOrdinalsPresent[lhsh[i].sigOrdinal] = true
enh.Out = append(enh.Out, ls)
}
// Add all right-hand side elements which have not been added from the left-hand side.
for j, rs := range rhs {
if _, ok := leftSigs[rhsh[j].signature]; !ok {
if !leftSigOrdinalsPresent[rhsh[j].sigOrdinal] {
enh.Out = append(enh.Out, rs)
}
}
@ -2774,13 +2805,14 @@ func (*evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatching,
return enh.Out
}
rightSigs := map[string]struct{}{}
// Ordinals of signatures present on the right-hand side.
rightSigOrdinalsPresent := make([]bool, enh.numSigs)
for _, sh := range rhsh {
rightSigs[sh.signature] = struct{}{}
rightSigOrdinalsPresent[sh.sigOrdinal] = true
}
for i, ls := range lhs {
if _, ok := rightSigs[lhsh[i].signature]; !ok {
if !rightSigOrdinalsPresent[lhsh[i].sigOrdinal] {
enh.Out = append(enh.Out, ls)
}
}
@ -2804,22 +2836,20 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
lhsh, rhsh = rhsh, lhsh
}
// All samples from the rhs hashed by the matching label/values.
// All samples from the rhs by their join signature ordinal.
if enh.rightSigs == nil {
enh.rightSigs = make(map[string]Sample, len(enh.Out))
enh.rightSigs = make(map[int]Sample, len(enh.Out))
} else {
for k := range enh.rightSigs {
delete(enh.rightSigs, k)
}
clear(enh.rightSigs)
}
rightSigs := enh.rightSigs
// Add all rhs samples to a map so we can easily find matches later.
for i, rs := range rhs {
sig := rhsh[i].signature
sigOrd := rhsh[i].sigOrdinal
// The rhs is guaranteed to be the 'one' side. Having multiple samples
// with the same signature means that the matching is many-to-many.
if duplSample, found := rightSigs[sig]; found {
if duplSample, found := rightSigs[sigOrd]; found {
// oneSide represents which side of the vector represents the 'one' in the many-to-one relationship.
oneSide := "right"
if matching.Card == parser.CardOneToMany {
@ -2830,17 +2860,15 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
ev.errorf("found duplicate series for the match group %s on the %s hand-side of the operation: [%s, %s]"+
";many-to-many matching not allowed: matching labels must be unique on one side", matchedLabels.String(), oneSide, rs.Metric.String(), duplSample.Metric.String())
}
rightSigs[sig] = rs
rightSigs[sigOrd] = rs
}
// Tracks the match-signature. For one-to-one operations the value is nil. For many-to-one
// the value is a set of signatures to detect duplicated result elements.
// Tracks the matching by signature ordinals. For one-to-one operations the value is nil.
// For many-to-one the value is a set of hashes to detect duplicated result elements.
if enh.matchedSigs == nil {
enh.matchedSigs = make(map[string]map[uint64]struct{}, len(rightSigs))
enh.matchedSigs = make(map[int]map[uint64]struct{}, len(rightSigs))
} else {
for k := range enh.matchedSigs {
delete(enh.matchedSigs, k)
}
clear(enh.matchedSigs)
}
matchedSigs := enh.matchedSigs
@ -2848,9 +2876,9 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
// the binary operation.
var lastErr error
for i, ls := range lhs {
sig := lhsh[i].signature
sigOrd := lhsh[i].sigOrdinal
rs, found := rightSigs[sig] // Look for a match in the rhs Vector.
rs, found := rightSigs[sigOrd] // Look for a match in the rhs Vector.
if !found {
continue
}
@ -2885,12 +2913,12 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
if !ev.enableDelayedNameRemoval && returnBool {
metric = metric.DropReserved(schema.IsMetadataLabel)
}
insertedSigs, exists := matchedSigs[sig]
insertedSigs, exists := matchedSigs[sigOrd]
if matching.Card == parser.CardOneToOne {
if exists {
ev.errorf("multiple matches for labels: many-to-one matching must be explicit (group_left/group_right)")
}
matchedSigs[sig] = nil // Set existence to true.
matchedSigs[sigOrd] = nil // Set existence to true.
} else {
// In many-to-one matching the grouping labels have to ensure a unique metric
// for the result Vector. Check whether those labels have already been added for
@ -2899,7 +2927,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
if !exists {
insertedSigs = map[uint64]struct{}{}
matchedSigs[sig] = insertedSigs
matchedSigs[sigOrd] = insertedSigs
} else if _, duplicate := insertedSigs[insertSig]; duplicate {
ev.errorf("multiple matches for labels: grouping labels must ensure unique matches")
}
@ -2916,20 +2944,6 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
return enh.Out, lastErr
}
func signatureFunc(on bool, b []byte, names ...string) func(labels.Labels) string {
if on {
slices.Sort(names)
return func(lset labels.Labels) string {
return string(lset.BytesWithLabels(b, names...))
}
}
names = append([]string{labels.MetricName}, names...)
slices.Sort(names)
return func(lset labels.Labels) string {
return string(lset.BytesWithoutLabels(b, names...))
}
}
// resultMetric returns the metric for the given sample(s) based on the Vector
// binary operation and the matching options.
func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.VectorMatching, enh *EvalNodeHelper) labels.Labels {

View File

@ -337,10 +337,10 @@ func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[u
}
// All samples from the info Vector hashed by the matching label/values.
if enh.rightSigs == nil {
enh.rightSigs = make(map[string]Sample, len(enh.Out))
if enh.rightStrSigs == nil {
enh.rightStrSigs = make(map[string]Sample, len(enh.Out))
} else {
clear(enh.rightSigs)
clear(enh.rightStrSigs)
}
for _, s := range info {
@ -351,7 +351,7 @@ func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[u
origT := int64(s.F)
sig := infoSigs[s.Metric.Hash()]
if existing, exists := enh.rightSigs[sig]; exists {
if existing, exists := enh.rightStrSigs[sig]; exists {
// We encode original info sample timestamps via the float value.
existingOrigT := int64(existing.F)
switch {
@ -359,14 +359,14 @@ func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[u
// Keep the other info sample, since it's newer.
case existingOrigT < origT:
// Keep this info sample, since it's newer.
enh.rightSigs[sig] = s
enh.rightStrSigs[sig] = s
default:
// The two info samples have the same timestamp - conflict.
ev.errorf("found duplicate series for info metric: existing %s @ %d, new %s @ %d",
existing.Metric.String(), existingOrigT, s.Metric.String(), origT)
}
} else {
enh.rightSigs[sig] = s
enh.rightStrSigs[sig] = s
}
}
@ -389,7 +389,7 @@ func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[u
// For every info metric name, try to find an info series with the same signature.
seenInfoMetrics := map[string]struct{}{}
for infoName, sig := range baseSigs[hash] {
is, exists := enh.rightSigs[sig]
is, exists := enh.rightStrSigs[sig]
if !exists {
continue
}