From f330ccaf2fee672127049bd867a9331b941c5c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Med=C5=BEi=C5=ABnas?= Date: Wed, 12 Nov 2025 12:43:05 +0200 Subject: [PATCH] [PERF] PromQL: eliminate string-keyed maps in binary vector matching (#17131) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this PR, we are eliminating expensive string-keyed (by signature) maps that are accessed for every sample processed. During preprocessing in rangeEval, we assign a unique number from 0 to n-1 to each of the n string signature values, and later only use this number as a label set signature. Signed-off-by: Linas Medžiūnas Co-authored-by: George Krajcsovits --- promql/engine.go | 144 ++++++++++++++++++++++++++--------------------- promql/info.go | 14 ++--- 2 files changed, 86 insertions(+), 72 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 75fc9b05d3..146a93ec58 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -1144,8 +1144,13 @@ func (ev *evaluator) Eval(ctx context.Context, expr parser.Expr) (v parser.Value // EvalSeriesHelper stores extra information about a series. type EvalSeriesHelper struct { - // Used to map left-hand to right-hand in binary operations. - signature string + // Ordinal number of join signature, used to map left-hand to right-hand in + // binary operations. For example given the following series, if + // the join signature is job, instance then: + // metric{job="a", instance="1", other="x"} -> sigOrdinal 0 + // metric{job="a", instance="1", other="y"} -> sigOrdinal 0 + // metric{job="a", instance="2", other="x"} -> sigOrdinal 1 + sigOrdinal int } // EvalNodeHelper stores extra information and caches for evaluating a single node across steps. @@ -1165,9 +1170,13 @@ type EvalNodeHelper struct { lblResultBuf []byte // For binary vector matching. - rightSigs map[string]Sample - matchedSigs map[string]map[uint64]struct{} + rightSigs map[int]Sample + matchedSigs map[int]map[uint64]struct{} resultMetric map[string]labels.Labels + numSigs int + + // For info series matching. + rightStrSigs map[string]Sample // Additional options for the evaluation. enableDelayedNameRemoval bool @@ -1252,11 +1261,12 @@ func (enh *EvalNodeHelper) resetHistograms(inVec Vector, arg parser.Expr) annota // function call results. // The prepSeries function (if provided) can be used to prepare the helper // for each series, then passed to each call funcCall. -func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Labels, *EvalSeriesHelper), funcCall func([]Vector, Matrix, [][]EvalSeriesHelper, *EvalNodeHelper) (Vector, annotations.Annotations), exprs ...parser.Expr) (Matrix, annotations.Annotations) { +func (ev *evaluator) rangeEval(ctx context.Context, matching *parser.VectorMatching, funcCall func([]Vector, Matrix, [][]EvalSeriesHelper, *EvalNodeHelper) (Vector, annotations.Annotations), exprs ...parser.Expr) (Matrix, annotations.Annotations) { numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 matrixes := make([]Matrix, len(exprs)) origMatrixes := make([]Matrix, len(exprs)) originalNumSamples := ev.currentSamples + useSignatures := matching != nil var warnings annotations.Annotations for i, e := range exprs { @@ -1297,18 +1307,46 @@ func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Label bufHelpers [][]EvalSeriesHelper // Buffer updated on each step ) - // If the series preparation function is provided, we should run it for - // every single series in the matrix. - if prepSeries != nil { + if useSignatures { + var ( + // Function to compute the join signature for each series. + sigf func(labels.Labels) string + buf = make([]byte, 0, 1024) + names = slices.Clone(matching.MatchingLabels) + ) + if matching.On { + slices.Sort(names) + sigf = func(lset labels.Labels) string { + return string(lset.BytesWithLabels(buf, names...)) + } + } else { // "without" + names = append([]string{labels.MetricName}, names...) + slices.Sort(names) + sigf = func(lset labels.Labels) string { + return string(lset.BytesWithoutLabels(buf, names...)) + } + } + seriesHelpers = make([][]EvalSeriesHelper, len(exprs)) bufHelpers = make([][]EvalSeriesHelper, len(exprs)) + signatureToOrdinal := make(map[string]int) + for i := range exprs { seriesHelpers[i] = make([]EvalSeriesHelper, len(matrixes[i])) bufHelpers[i] = make([]EvalSeriesHelper, len(matrixes[i])) for si, series := range matrixes[i] { - prepSeries(series.Metric, &seriesHelpers[i][si]) + strSig := sigf(series.Metric) + + if sigOrd, ok := signatureToOrdinal[strSig]; ok { + seriesHelpers[i][si] = EvalSeriesHelper{sigOrdinal: sigOrd} + continue + } + + signatureToOrdinal[strSig] = enh.numSigs + seriesHelpers[i][si] = EvalSeriesHelper{sigOrdinal: enh.numSigs} + enh.numSigs++ } } } @@ -1323,12 +1361,12 @@ func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Label for i := range exprs { var bh []EvalSeriesHelper var sh []EvalSeriesHelper - if prepSeries != nil { + if useSignatures { bh = bufHelpers[i][:0] sh = seriesHelpers[i] } vectors[i], bh = ev.gatherVector(ts, matrixes[i], vectors[i], bh, sh) - if prepSeries != nil { + if useSignatures { bufHelpers[i] = bh } } @@ -2129,27 +2167,21 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, return append(enh.Out, Sample{F: val}), nil }, e.LHS, e.RHS) case lt == parser.ValueTypeVector && rt == parser.ValueTypeVector: - // Function to compute the join signature for each series. - buf := make([]byte, 0, 1024) - sigf := signatureFunc(e.VectorMatching.On, buf, e.VectorMatching.MatchingLabels...) - initSignatures := func(series labels.Labels, h *EvalSeriesHelper) { - h.signature = sigf(series) - } switch e.Op { case parser.LAND: - return ev.rangeEval(ctx, initSignatures, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, e.VectorMatching, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorAnd(v[0], v[1], e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) case parser.LOR: - return ev.rangeEval(ctx, initSignatures, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, e.VectorMatching, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorOr(v[0], v[1], e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) case parser.LUNLESS: - return ev.rangeEval(ctx, initSignatures, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, e.VectorMatching, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorUnless(v[0], v[1], e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) default: - return ev.rangeEval(ctx, initSignatures, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, e.VectorMatching, func(v []Vector, _ Matrix, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec, err := ev.VectorBinop(e.Op, v[0], v[1], e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) @@ -2720,16 +2752,15 @@ func (*evaluator) VectorAnd(lhs, rhs Vector, matching *parser.VectorMatching, lh return nil // Short-circuit: AND with nothing is nothing. } - // The set of signatures for the right-hand side Vector. - rightSigs := map[string]struct{}{} - // Add all rhs samples to a map so we can easily find matches later. + // Ordinals of signatures present on the right-hand side. + rightSigOrdinalsPresent := make([]bool, enh.numSigs) for _, sh := range rhsh { - rightSigs[sh.signature] = struct{}{} + rightSigOrdinalsPresent[sh.sigOrdinal] = true } for i, ls := range lhs { // If there's a matching entry in the right-hand side Vector, add the sample. - if _, ok := rightSigs[lhsh[i].signature]; ok { + if rightSigOrdinalsPresent[lhsh[i].sigOrdinal] { enh.Out = append(enh.Out, ls) } } @@ -2748,15 +2779,15 @@ func (*evaluator) VectorOr(lhs, rhs Vector, matching *parser.VectorMatching, lhs return enh.Out } - leftSigs := map[string]struct{}{} + leftSigOrdinalsPresent := make([]bool, enh.numSigs) // Add everything from the left-hand-side Vector. for i, ls := range lhs { - leftSigs[lhsh[i].signature] = struct{}{} + leftSigOrdinalsPresent[lhsh[i].sigOrdinal] = true enh.Out = append(enh.Out, ls) } // Add all right-hand side elements which have not been added from the left-hand side. for j, rs := range rhs { - if _, ok := leftSigs[rhsh[j].signature]; !ok { + if !leftSigOrdinalsPresent[rhsh[j].sigOrdinal] { enh.Out = append(enh.Out, rs) } } @@ -2774,13 +2805,14 @@ func (*evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatching, return enh.Out } - rightSigs := map[string]struct{}{} + // Ordinals of signatures present on the right-hand side. + rightSigOrdinalsPresent := make([]bool, enh.numSigs) for _, sh := range rhsh { - rightSigs[sh.signature] = struct{}{} + rightSigOrdinalsPresent[sh.sigOrdinal] = true } for i, ls := range lhs { - if _, ok := rightSigs[lhsh[i].signature]; !ok { + if !rightSigOrdinalsPresent[lhsh[i].sigOrdinal] { enh.Out = append(enh.Out, ls) } } @@ -2804,22 +2836,20 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * lhsh, rhsh = rhsh, lhsh } - // All samples from the rhs hashed by the matching label/values. + // All samples from the rhs by their join signature ordinal. if enh.rightSigs == nil { - enh.rightSigs = make(map[string]Sample, len(enh.Out)) + enh.rightSigs = make(map[int]Sample, len(enh.Out)) } else { - for k := range enh.rightSigs { - delete(enh.rightSigs, k) - } + clear(enh.rightSigs) } rightSigs := enh.rightSigs // Add all rhs samples to a map so we can easily find matches later. for i, rs := range rhs { - sig := rhsh[i].signature + sigOrd := rhsh[i].sigOrdinal // The rhs is guaranteed to be the 'one' side. Having multiple samples // with the same signature means that the matching is many-to-many. - if duplSample, found := rightSigs[sig]; found { + if duplSample, found := rightSigs[sigOrd]; found { // oneSide represents which side of the vector represents the 'one' in the many-to-one relationship. oneSide := "right" if matching.Card == parser.CardOneToMany { @@ -2830,17 +2860,15 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * ev.errorf("found duplicate series for the match group %s on the %s hand-side of the operation: [%s, %s]"+ ";many-to-many matching not allowed: matching labels must be unique on one side", matchedLabels.String(), oneSide, rs.Metric.String(), duplSample.Metric.String()) } - rightSigs[sig] = rs + rightSigs[sigOrd] = rs } - // Tracks the match-signature. For one-to-one operations the value is nil. For many-to-one - // the value is a set of signatures to detect duplicated result elements. + // Tracks the matching by signature ordinals. For one-to-one operations the value is nil. + // For many-to-one the value is a set of hashes to detect duplicated result elements. if enh.matchedSigs == nil { - enh.matchedSigs = make(map[string]map[uint64]struct{}, len(rightSigs)) + enh.matchedSigs = make(map[int]map[uint64]struct{}, len(rightSigs)) } else { - for k := range enh.matchedSigs { - delete(enh.matchedSigs, k) - } + clear(enh.matchedSigs) } matchedSigs := enh.matchedSigs @@ -2848,9 +2876,9 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * // the binary operation. var lastErr error for i, ls := range lhs { - sig := lhsh[i].signature + sigOrd := lhsh[i].sigOrdinal - rs, found := rightSigs[sig] // Look for a match in the rhs Vector. + rs, found := rightSigs[sigOrd] // Look for a match in the rhs Vector. if !found { continue } @@ -2885,12 +2913,12 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * if !ev.enableDelayedNameRemoval && returnBool { metric = metric.DropReserved(schema.IsMetadataLabel) } - insertedSigs, exists := matchedSigs[sig] + insertedSigs, exists := matchedSigs[sigOrd] if matching.Card == parser.CardOneToOne { if exists { ev.errorf("multiple matches for labels: many-to-one matching must be explicit (group_left/group_right)") } - matchedSigs[sig] = nil // Set existence to true. + matchedSigs[sigOrd] = nil // Set existence to true. } else { // In many-to-one matching the grouping labels have to ensure a unique metric // for the result Vector. Check whether those labels have already been added for @@ -2899,7 +2927,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * if !exists { insertedSigs = map[uint64]struct{}{} - matchedSigs[sig] = insertedSigs + matchedSigs[sigOrd] = insertedSigs } else if _, duplicate := insertedSigs[insertSig]; duplicate { ev.errorf("multiple matches for labels: grouping labels must ensure unique matches") } @@ -2916,20 +2944,6 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * return enh.Out, lastErr } -func signatureFunc(on bool, b []byte, names ...string) func(labels.Labels) string { - if on { - slices.Sort(names) - return func(lset labels.Labels) string { - return string(lset.BytesWithLabels(b, names...)) - } - } - names = append([]string{labels.MetricName}, names...) - slices.Sort(names) - return func(lset labels.Labels) string { - return string(lset.BytesWithoutLabels(b, names...)) - } -} - // resultMetric returns the metric for the given sample(s) based on the Vector // binary operation and the matching options. func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.VectorMatching, enh *EvalNodeHelper) labels.Labels { diff --git a/promql/info.go b/promql/info.go index 0067fce845..d5ffda6af2 100644 --- a/promql/info.go +++ b/promql/info.go @@ -337,10 +337,10 @@ func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[u } // All samples from the info Vector hashed by the matching label/values. - if enh.rightSigs == nil { - enh.rightSigs = make(map[string]Sample, len(enh.Out)) + if enh.rightStrSigs == nil { + enh.rightStrSigs = make(map[string]Sample, len(enh.Out)) } else { - clear(enh.rightSigs) + clear(enh.rightStrSigs) } for _, s := range info { @@ -351,7 +351,7 @@ func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[u origT := int64(s.F) sig := infoSigs[s.Metric.Hash()] - if existing, exists := enh.rightSigs[sig]; exists { + if existing, exists := enh.rightStrSigs[sig]; exists { // We encode original info sample timestamps via the float value. existingOrigT := int64(existing.F) switch { @@ -359,14 +359,14 @@ func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[u // Keep the other info sample, since it's newer. case existingOrigT < origT: // Keep this info sample, since it's newer. - enh.rightSigs[sig] = s + enh.rightStrSigs[sig] = s default: // The two info samples have the same timestamp - conflict. ev.errorf("found duplicate series for info metric: existing %s @ %d, new %s @ %d", existing.Metric.String(), existingOrigT, s.Metric.String(), origT) } } else { - enh.rightSigs[sig] = s + enh.rightStrSigs[sig] = s } } @@ -389,7 +389,7 @@ func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[u // For every info metric name, try to find an info series with the same signature. seenInfoMetrics := map[string]struct{}{} for infoName, sig := range baseSigs[hash] { - is, exists := enh.rightSigs[sig] + is, exists := enh.rightStrSigs[sig] if !exists { continue }