fix(parser): wrong end position aggregate expression (#17031)

* fix(parser): wrong end position aggregate expression

Fixes: https://github.com/prometheus/prometheus/issues/16053

The position range of nested aggregate expression was wrong, for the
expression "(sum(foo))" the position of "sum(foo)" should be 1-9, but
the parser could not decide the end of the expression on pos 9, instead
it read ahead to pos 10 and then emitted the aggregate. But we only
kept the last closing position (10) and wrote that into the aggregate.

The reason for this is that the parser cannot know from "(sum(foo)" alone
if the aggregate is finished. It could be finished as in "(sum(foo))" but
equally it could continue with group modifier as "(sum(foo) by (bar))".

Previous fix in #16041 tried to keep track of parenthesis, but that is
complicated because the error happens after closing two parenthesis. That
fix introduced new bugs.

This fix now addresses the issue directly. Since we have to step outside
the parser state machine anyway, we can just add an algorithm to
detect and fix the issue. That's Lexer.findPrevRightParen().

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
This commit is contained in:
George Krajcsovits 2025-08-13 15:52:52 +02:00 committed by GitHub
parent dd6ad8ec4c
commit 2aaeefe8e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 93 additions and 11 deletions

View File

@ -2890,7 +2890,7 @@ func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) {
},
PosRange: posrange.PositionRange{
Start: 29,
End: 52, // TODO(krajorama): this should be 51. https://github.com/prometheus/prometheus/issues/16053
End: 51,
},
},
},

View File

@ -244,15 +244,15 @@ expr :
*/
aggregate_expr : aggregate_op aggregate_modifier function_call_body
{ $$ = yylex.(*parser).newAggregateExpr($1, $2, $3) }
{ $$ = yylex.(*parser).newAggregateExpr($1, $2, $3, false) }
| aggregate_op function_call_body aggregate_modifier
{ $$ = yylex.(*parser).newAggregateExpr($1, $3, $2) }
{ $$ = yylex.(*parser).newAggregateExpr($1, $3, $2, false) }
| aggregate_op function_call_body
{ $$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, $2) }
{ $$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, $2, true) }
| aggregate_op error
{
yylex.(*parser).unexpected("aggregation","");
$$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, Expressions{})
$$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, Expressions{}, false)
}
;

View File

@ -1122,23 +1122,23 @@ yydefault:
case 21:
yyDollar = yyS[yypt-3 : yypt+1]
{
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[2].node, yyDollar[3].node)
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[2].node, yyDollar[3].node, false)
}
case 22:
yyDollar = yyS[yypt-3 : yypt+1]
{
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[3].node, yyDollar[2].node)
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[3].node, yyDollar[2].node, false)
}
case 23:
yyDollar = yyS[yypt-2 : yypt+1]
{
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, &AggregateExpr{}, yyDollar[2].node)
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, &AggregateExpr{}, yyDollar[2].node, true)
}
case 24:
yyDollar = yyS[yypt-2 : yypt+1]
{
yylex.(*parser).unexpected("aggregation", "")
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, &AggregateExpr{}, Expressions{})
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, &AggregateExpr{}, Expressions{}, false)
}
case 25:
yyDollar = yyS[yypt-2 : yypt+1]

View File

@ -1185,3 +1185,34 @@ func lexDurationExpr(l *Lexer) stateFn {
return l.errorf("unexpected character in duration expression: %q", r)
}
}
// findPrevRightParen finds the previous right parenthesis.
// Use in case when the parser had to read ahead to the find the next right
// parenthesis to decide whether to continue and lost track of the previous right
// parenthesis position.
// Only use when outside string literals as those can have runes made up of
// multiple bytes, which would break the position calculation.
// Falls back to the input start position on any problem.
// https://github.com/prometheus/prometheus/issues/16053
func (l *Lexer) findPrevRightParen(fallbackPos posrange.Pos) posrange.Pos {
// Early return on:
// - invalid fallback position,
// - not enough space for second right parenthesis,
// - last read position is after the end, since then we stopped due to the
// end of the input, not a parenthesis, or if last position doesn't hold
// right parenthesis,
// - last position doesn't hold right parenthesis.
if fallbackPos <= 0 || fallbackPos > posrange.Pos(len(l.input)) || l.lastPos <= 0 || l.lastPos >= posrange.Pos(len(l.input)) || l.input[l.lastPos] != ')' {
return fallbackPos
}
for i := l.lastPos - 1; i > 0; i-- {
switch {
case l.input[i] == ')':
return i + 1
case isSpace(rune(l.input[i])):
default:
return fallbackPos
}
}
return fallbackPos
}

View File

@ -432,7 +432,7 @@ func (*parser) assembleVectorSelector(vs *VectorSelector) {
}
}
func (p *parser) newAggregateExpr(op Item, modifier, args Node) (ret *AggregateExpr) {
func (p *parser) newAggregateExpr(op Item, modifier, args Node, overread bool) (ret *AggregateExpr) {
ret = modifier.(*AggregateExpr)
arguments := args.(Expressions)
@ -440,6 +440,9 @@ func (p *parser) newAggregateExpr(op Item, modifier, args Node) (ret *AggregateE
Start: op.Pos,
End: p.lastClosing,
}
if overread {
ret.PosRange.End = p.lex.findPrevRightParen(p.lastClosing)
}
ret.Op = op.Typ

View File

@ -4776,11 +4776,31 @@ var testExpr = []struct {
End: 8,
},
},
PosRange: posrange.PositionRange{Start: 1, End: 10}, // TODO(krajorama): this should be 9. https://github.com/prometheus/prometheus/issues/16053
PosRange: posrange.PositionRange{Start: 1, End: 9},
},
PosRange: posrange.PositionRange{Start: 0, End: 10},
},
},
{
input: "(sum(foo) )",
expected: &ParenExpr{
Expr: &AggregateExpr{
Op: SUM,
Expr: &VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
},
PosRange: posrange.PositionRange{
Start: 5,
End: 8,
},
},
PosRange: posrange.PositionRange{Start: 1, End: 9},
},
PosRange: posrange.PositionRange{Start: 0, End: 11},
},
},
{
input: "(sum(foo) by (bar))",
expected: &ParenExpr{
@ -4823,6 +4843,34 @@ var testExpr = []struct {
PosRange: posrange.PositionRange{Start: 0, End: 20},
},
},
{
input: "sum by (job)(rate(http_requests_total[30m]))",
expected: &AggregateExpr{
Op: SUM,
Grouping: []string{"job"},
Expr: &Call{
Func: MustGetFunction("rate"),
Args: Expressions{
&MatrixSelector{
VectorSelector: &VectorSelector{
Name: "http_requests_total",
LabelMatchers: []*labels.Matcher{
MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_requests_total"),
},
PosRange: posrange.PositionRange{
Start: 18,
End: 37,
},
},
Range: 30 * time.Minute,
EndPos: 42,
},
},
PosRange: posrange.PositionRange{Start: 13, End: 43},
},
PosRange: posrange.PositionRange{Start: 0, End: 44},
},
},
{
input: "sum(rate(",
fail: true,