From c64dd612efadf8ccafc4bf985bd866ade719a727 Mon Sep 17 00:00:00 2001 From: zenador Date: Sun, 16 Nov 2025 04:07:36 +0800 Subject: [PATCH] PromQL: Fix bug with inconsistent results for queries with OR expression and EnableDelayedNameRemoval (#17161) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jeanette Tan Signed-off-by: zenador Co-authored-by: Björn Rabenstein --- docs/feature_flags.md | 2 ++ promql/engine.go | 8 ++++- .../testdata/name_label_dropping.test | 35 +++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 384b124c6a..f9f27bfb7f 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -150,6 +150,8 @@ These queries are rare to occur and easy to fix. (In the above example, removing `by (__name__)` doesn't change anything without the feature flag and fixes the possible problem with the feature flag.) +It is possible to craft a query that aggregates by `__name__` and puts samples with and without delayed name removal into the same group. In that case, the name is removed from the affected group. Note that this case hardly occurs in queries that fulfill a practical purpose. + ## Auto Reload Config `--enable-feature=auto-reload-config` diff --git a/promql/engine.go b/promql/engine.go index 74864bdcae..67f9b9e3ba 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3185,6 +3185,7 @@ type groupedAggregation struct { incrementalMean bool // True after reverting to incremental calculation of the mean value. counterResetSeen bool // Counter reset hint CounterReset seen. Currently only used for histogram samples. notCounterResetSeen bool // Counter reset hint NotCounterReset seen. Currently only used for histogram samples. + dropName bool // True if any sample in this group has DropName set. } // aggregation evaluates sum, avg, count, stdvar, stddev or quantile at one timestep on inputMatrix. @@ -3213,6 +3214,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix floatMean: f, incompatibleHistograms: false, groupCount: 1, + dropName: inputMatrix[si].DropName, } switch op { case parser.AVG, parser.SUM: @@ -3269,6 +3271,10 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix continue } + if inputMatrix[si].DropName { + group.dropName = true + } + switch op { case parser.SUM: if h != nil { @@ -3507,7 +3513,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix ss := &outputMatrix[ri] addToSeries(ss, enh.Ts, aggr.floatValue, aggr.histogramValue, numSteps) - ss.DropName = inputMatrix[ri].DropName + ss.DropName = aggr.dropName } return annos diff --git a/promql/promqltest/testdata/name_label_dropping.test b/promql/promqltest/testdata/name_label_dropping.test index 3682021ba9..3a6f4098df 100644 --- a/promql/promqltest/testdata/name_label_dropping.test +++ b/promql/promqltest/testdata/name_label_dropping.test @@ -91,3 +91,38 @@ eval instant at 10m topk(10, sum by (__name__, env) (metric_total{env="1"})) eval instant at 10m topk(10, sum by (__name__, env) (rate(metric_total{env="1"}[10m]))) {env="1"} 0.2 + +clear + +# More testing for __name__ label drop with different input series. +load 1m + metric_total{env="1"} 0+1x10 + metric_total{env="2"} 0+3x10 + +# Metric name is preserved as there is no function that drops it. +eval instant at 10m sum by (__name__) (metric_total{env="1"}) + metric_total 10 + +# Metric name is dropped at the end because of rate and because there is no label function to preserve it. +eval instant at 10m sum by (__name__) (rate(metric_total{env="2"}[5m])) + {} 0.05 + +# Metric name is preserved with label_replace even though it would have been dropped with rate. +eval instant at 10m label_replace(sum by (__name__) (rate(metric_total{env="2"}[5m])), "__name__", "$1", "__name__", "(.+)") + metric_total 0.05 + +# Combining the above cases in an OR expression, we drop the name if any of the series drops it. +eval instant at 10m sum by (__name__) (metric_total{env="1"} or rate(metric_total{env="2"}[5m])) + {} 10.05 + +# Changing the order of the OR expression should not change the result. +eval instant at 10m sum by (__name__) (rate(metric_total{env="2"}[5m]) or metric_total{env="1"}) + {} 10.05 + +# With non-matching first selector, we use the second to determine if __name__ is dropped. +eval instant at 10m sum by (__name__) (metric_total{env="3"} or rate(metric_total{env="2"}[5m])) + {} 0.05 + +# Same as above, but with reversed order. +eval instant at 10m sum by (__name__) (rate(metric_total{env="3"}[5m]) or metric_total{env="1"}) + metric_total 10