From 2aaeefe8e15a8ac81419342c431630c2de0ff0a9 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Wed, 13 Aug 2025 15:52:52 +0200 Subject: [PATCH] fix(parser): wrong end position aggregate expression (#17031) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(parser): wrong end position aggregate expression Fixes: https://github.com/prometheus/prometheus/issues/16053 The position range of nested aggregate expression was wrong, for the expression "(sum(foo))" the position of "sum(foo)" should be 1-9, but the parser could not decide the end of the expression on pos 9, instead it read ahead to pos 10 and then emitted the aggregate. But we only kept the last closing position (10) and wrote that into the aggregate. The reason for this is that the parser cannot know from "(sum(foo)" alone if the aggregate is finished. It could be finished as in "(sum(foo))" but equally it could continue with group modifier as "(sum(foo) by (bar))". Previous fix in #16041 tried to keep track of parenthesis, but that is complicated because the error happens after closing two parenthesis. That fix introduced new bugs. This fix now addresses the issue directly. Since we have to step outside the parser state machine anyway, we can just add an algorithm to detect and fix the issue. That's Lexer.findPrevRightParen(). Signed-off-by: György Krajcsovits --- promql/engine_test.go | 2 +- promql/parser/generated_parser.y | 8 ++--- promql/parser/generated_parser.y.go | 8 ++--- promql/parser/lex.go | 31 ++++++++++++++++++ promql/parser/parse.go | 5 ++- promql/parser/parse_test.go | 50 ++++++++++++++++++++++++++++- 6 files changed, 93 insertions(+), 11 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index 67d5fbb5f0..b6d9c3f1a4 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -2890,7 +2890,7 @@ func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) { }, PosRange: posrange.PositionRange{ Start: 29, - End: 52, // TODO(krajorama): this should be 51. https://github.com/prometheus/prometheus/issues/16053 + End: 51, }, }, }, diff --git a/promql/parser/generated_parser.y b/promql/parser/generated_parser.y index 474eb74d1a..61e11ad3cd 100644 --- a/promql/parser/generated_parser.y +++ b/promql/parser/generated_parser.y @@ -244,15 +244,15 @@ expr : */ aggregate_expr : aggregate_op aggregate_modifier function_call_body - { $$ = yylex.(*parser).newAggregateExpr($1, $2, $3) } + { $$ = yylex.(*parser).newAggregateExpr($1, $2, $3, false) } | aggregate_op function_call_body aggregate_modifier - { $$ = yylex.(*parser).newAggregateExpr($1, $3, $2) } + { $$ = yylex.(*parser).newAggregateExpr($1, $3, $2, false) } | aggregate_op function_call_body - { $$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, $2) } + { $$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, $2, true) } | aggregate_op error { yylex.(*parser).unexpected("aggregation",""); - $$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, Expressions{}) + $$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, Expressions{}, false) } ; diff --git a/promql/parser/generated_parser.y.go b/promql/parser/generated_parser.y.go index 7037245a64..f0b5b66a17 100644 --- a/promql/parser/generated_parser.y.go +++ b/promql/parser/generated_parser.y.go @@ -1122,23 +1122,23 @@ yydefault: case 21: yyDollar = yyS[yypt-3 : yypt+1] { - yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[2].node, yyDollar[3].node) + yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[2].node, yyDollar[3].node, false) } case 22: yyDollar = yyS[yypt-3 : yypt+1] { - yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[3].node, yyDollar[2].node) + yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[3].node, yyDollar[2].node, false) } case 23: yyDollar = yyS[yypt-2 : yypt+1] { - yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, &AggregateExpr{}, yyDollar[2].node) + yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, &AggregateExpr{}, yyDollar[2].node, true) } case 24: yyDollar = yyS[yypt-2 : yypt+1] { yylex.(*parser).unexpected("aggregation", "") - yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, &AggregateExpr{}, Expressions{}) + yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, &AggregateExpr{}, Expressions{}, false) } case 25: yyDollar = yyS[yypt-2 : yypt+1] diff --git a/promql/parser/lex.go b/promql/parser/lex.go index 2b3eecbadd..2a48942956 100644 --- a/promql/parser/lex.go +++ b/promql/parser/lex.go @@ -1185,3 +1185,34 @@ func lexDurationExpr(l *Lexer) stateFn { return l.errorf("unexpected character in duration expression: %q", r) } } + +// findPrevRightParen finds the previous right parenthesis. +// Use in case when the parser had to read ahead to the find the next right +// parenthesis to decide whether to continue and lost track of the previous right +// parenthesis position. +// Only use when outside string literals as those can have runes made up of +// multiple bytes, which would break the position calculation. +// Falls back to the input start position on any problem. +// https://github.com/prometheus/prometheus/issues/16053 +func (l *Lexer) findPrevRightParen(fallbackPos posrange.Pos) posrange.Pos { + // Early return on: + // - invalid fallback position, + // - not enough space for second right parenthesis, + // - last read position is after the end, since then we stopped due to the + // end of the input, not a parenthesis, or if last position doesn't hold + // right parenthesis, + // - last position doesn't hold right parenthesis. + if fallbackPos <= 0 || fallbackPos > posrange.Pos(len(l.input)) || l.lastPos <= 0 || l.lastPos >= posrange.Pos(len(l.input)) || l.input[l.lastPos] != ')' { + return fallbackPos + } + for i := l.lastPos - 1; i > 0; i-- { + switch { + case l.input[i] == ')': + return i + 1 + case isSpace(rune(l.input[i])): + default: + return fallbackPos + } + } + return fallbackPos +} diff --git a/promql/parser/parse.go b/promql/parser/parse.go index 013e3f321b..8f24ee34fe 100644 --- a/promql/parser/parse.go +++ b/promql/parser/parse.go @@ -432,7 +432,7 @@ func (*parser) assembleVectorSelector(vs *VectorSelector) { } } -func (p *parser) newAggregateExpr(op Item, modifier, args Node) (ret *AggregateExpr) { +func (p *parser) newAggregateExpr(op Item, modifier, args Node, overread bool) (ret *AggregateExpr) { ret = modifier.(*AggregateExpr) arguments := args.(Expressions) @@ -440,6 +440,9 @@ func (p *parser) newAggregateExpr(op Item, modifier, args Node) (ret *AggregateE Start: op.Pos, End: p.lastClosing, } + if overread { + ret.PosRange.End = p.lex.findPrevRightParen(p.lastClosing) + } ret.Op = op.Typ diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index 1a45b896de..851c175e15 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -4776,11 +4776,31 @@ var testExpr = []struct { End: 8, }, }, - PosRange: posrange.PositionRange{Start: 1, End: 10}, // TODO(krajorama): this should be 9. https://github.com/prometheus/prometheus/issues/16053 + PosRange: posrange.PositionRange{Start: 1, End: 9}, }, PosRange: posrange.PositionRange{Start: 0, End: 10}, }, }, + { + input: "(sum(foo) )", + expected: &ParenExpr{ + Expr: &AggregateExpr{ + Op: SUM, + Expr: &VectorSelector{ + Name: "foo", + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"), + }, + PosRange: posrange.PositionRange{ + Start: 5, + End: 8, + }, + }, + PosRange: posrange.PositionRange{Start: 1, End: 9}, + }, + PosRange: posrange.PositionRange{Start: 0, End: 11}, + }, + }, { input: "(sum(foo) by (bar))", expected: &ParenExpr{ @@ -4823,6 +4843,34 @@ var testExpr = []struct { PosRange: posrange.PositionRange{Start: 0, End: 20}, }, }, + { + input: "sum by (job)(rate(http_requests_total[30m]))", + expected: &AggregateExpr{ + Op: SUM, + Grouping: []string{"job"}, + Expr: &Call{ + Func: MustGetFunction("rate"), + Args: Expressions{ + &MatrixSelector{ + VectorSelector: &VectorSelector{ + Name: "http_requests_total", + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_requests_total"), + }, + PosRange: posrange.PositionRange{ + Start: 18, + End: 37, + }, + }, + Range: 30 * time.Minute, + EndPos: 42, + }, + }, + PosRange: posrange.PositionRange{Start: 13, End: 43}, + }, + PosRange: posrange.PositionRange{Start: 0, End: 44}, + }, + }, { input: "sum(rate(", fail: true,