From b18b6cb3322a5329e67c18902b8e9d01fe1e3677 Mon Sep 17 00:00:00 2001 From: Tobias Guggenmos Date: Thu, 9 Jan 2020 12:26:58 +0100 Subject: [PATCH] PromQL: Avoid lexer item copies and allocations (#6584) * PromQL: Avoid lexer item copies and allocations Signed-off-by: Tobias Guggenmos --- promql/lex.go | 72 ++++++++++++++++++++++++-------------------- promql/lex_test.go | 18 +++++++++-- promql/parse.go | 46 ++++++++++++++-------------- promql/parse_test.go | 2 +- 4 files changed, 77 insertions(+), 61 deletions(-) diff --git a/promql/lex.go b/promql/lex.go index eb249fc704..96ef608a80 100644 --- a/promql/lex.go +++ b/promql/lex.go @@ -217,13 +217,14 @@ type Pos int // Lexer holds the state of the scanner. type Lexer struct { - input string // The string being scanned. - state stateFn // The next lexing function to enter. - pos Pos // Current position in the input. - start Pos // Start position of this Item. - width Pos // Width of last rune read from input. - lastPos Pos // Position of most recent Item returned by NextItem. - Items []Item // Slice buffer of scanned Items. + input string // The string being scanned. + state stateFn // The next lexing function to enter. + pos Pos // Current position in the input. + start Pos // Start position of this Item. + width Pos // Width of last rune read from input. + lastPos Pos // Position of most recent Item returned by NextItem. + itemp *Item // Pointer to where the next scanned item should be placed. + scannedItem bool // Set to true every time an item is scanned. parenDepth int // Nesting depth of ( ) exprs. braceOpen bool // Whether a { is opened. @@ -262,8 +263,9 @@ func (l *Lexer) backup() { // emit passes an Item back to the client. func (l *Lexer) emit(t ItemType) { - l.Items = append(l.Items, Item{t, l.start, l.input[l.start:l.pos]}) + *l.itemp = Item{t, l.start, l.input[l.start:l.pos]} l.start = l.pos + l.scannedItem = true } // ignore skips over the pending input before this point. @@ -308,23 +310,26 @@ func (l *Lexer) linePosition() int { // errorf returns an error token and terminates the scan by passing // back a nil pointer that will be the next state, terminating l.NextItem. func (l *Lexer) errorf(format string, args ...interface{}) stateFn { - l.Items = append(l.Items, Item{ERROR, l.start, fmt.Sprintf(format, args...)}) + *l.itemp = Item{ERROR, l.start, fmt.Sprintf(format, args...)} + l.scannedItem = true + return nil } -// NextItem returns the next Item from the input. -func (l *Lexer) NextItem() Item { - for len(l.Items) == 0 { - if l.state != nil { +// NextItem writes the next item to the provided address. +func (l *Lexer) NextItem(itemp *Item) { + l.scannedItem = false + l.itemp = itemp + + if l.state != nil { + for !l.scannedItem { l.state = l.state(l) - } else { - l.emit(EOF) } + } else { + l.emit(EOF) } - Item := l.Items[0] - l.Items = l.Items[1:] - l.lastPos = Item.Pos - return Item + + l.lastPos = l.itemp.Pos } // lex creates a new scanner for the input string. @@ -336,13 +341,6 @@ func Lex(input string) *Lexer { return l } -// run runs the state machine for the lexer. -func (l *Lexer) run() { - for l.state = lexStatements; l.state != nil; { - l.state = l.state(l) - } -} - // lineComment is the character that starts a line comment. const lineComment = "#" @@ -442,7 +440,7 @@ func lexStatements(l *Lexer) stateFn { case r == '{': l.emit(LEFT_BRACE) l.braceOpen = true - return lexInsideBraces(l) + return lexInsideBraces case r == '[': if l.bracketOpen { return l.errorf("unexpected left bracket %q", r) @@ -559,14 +557,14 @@ func lexValueSequence(l *Lexer) stateFn { // package of the Go standard library to work for Prometheus-style strings. // None of the actual escaping/quoting logic was changed in this function - it // was only modified to integrate with our lexer. -func lexEscape(l *Lexer) { +func lexEscape(l *Lexer) stateFn { var n int var base, max uint32 ch := l.next() switch ch { case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen: - return + return lexString case '0', '1', '2', '3', '4', '5', '6', '7': n, base, max = 3, 8, 255 case 'x': @@ -580,8 +578,10 @@ func lexEscape(l *Lexer) { n, base, max = 8, 16, unicode.MaxRune case eof: l.errorf("escape sequence not terminated") + return lexString default: l.errorf("unknown escape sequence %#U", ch) + return lexString } var x uint32 @@ -590,8 +590,10 @@ func lexEscape(l *Lexer) { if d >= base { if ch == eof { l.errorf("escape sequence not terminated") + return lexString } l.errorf("illegal character %#U in escape sequence", ch) + return lexString } x = x*base + d ch = l.next() @@ -601,6 +603,7 @@ func lexEscape(l *Lexer) { if x > max || 0xD800 <= x && x < 0xE000 { l.errorf("escape sequence is an invalid Unicode code point") } + return lexString } // digitVal returns the digit value of a rune or 16 in case the rune does not @@ -631,9 +634,10 @@ Loop: for { switch l.next() { case '\\': - lexEscape(l) + return lexEscape case utf8.RuneError: - return l.errorf("invalid UTF-8 rune") + l.errorf("invalid UTF-8 rune") + return lexString case eof, '\n': return l.errorf("unterminated quoted string") case l.stringOpen: @@ -650,9 +654,11 @@ Loop: for { switch l.next() { case utf8.RuneError: - return l.errorf("invalid UTF-8 rune") + l.errorf("invalid UTF-8 rune") + return lexRawString case eof: - return l.errorf("unterminated raw string") + l.errorf("unterminated raw string") + return lexRawString case l.stringOpen: break Loop } diff --git a/promql/lex_test.go b/promql/lex_test.go index 7d71bf3c56..4d6fc9c017 100644 --- a/promql/lex_test.go +++ b/promql/lex_test.go @@ -697,13 +697,25 @@ func TestLexer(t *testing.T) { input: test.input, seriesDesc: test.seriesDesc, } - l.run() - out := l.Items + var out []Item + + for l.state = lexStatements; l.state != nil; { + out = append(out, Item{}) + + l.NextItem(&out[len(out)-1]) + } lastItem := out[len(out)-1] if test.fail { - if lastItem.Typ != ERROR { + hasError := false + for _, item := range out { + if item.Typ == ERROR { + hasError = true + } + + } + if !hasError { t.Logf("%d: input %q", i, test.input) t.Fatalf("expected lexing error but did not fail") } diff --git a/promql/parse.go b/promql/parse.go index 0c4f634818..07feac8751 100644 --- a/promql/parse.go +++ b/promql/parse.go @@ -29,12 +29,13 @@ import ( ) type parser struct { - lex *Lexer - token Item + lex *Lexer - inject Item + inject ItemType injecting bool + yyParser yyParserImpl + generatedParserResult interface{} } @@ -129,21 +130,6 @@ func (p *parser) typecheck(node Node) (err error) { return nil } -// next returns the next token. -func (p *parser) next() Item { - t := p.lex.NextItem() - // Skip comments. - for t.Typ == COMMENT { - t = p.lex.NextItem() - } - p.token = t - - if p.token.Typ == ERROR { - p.errorf("%s", p.token.Val) - } - return p.token -} - // errorf formats the error and terminates processing. func (p *parser) errorf(format string, args ...interface{}) { p.error(errors.Errorf(format, args...)) @@ -169,7 +155,7 @@ func (p *parser) unexpected(context string, expected string) { var errMsg strings.Builder errMsg.WriteString("unexpected ") - errMsg.WriteString(p.token.desc()) + errMsg.WriteString(p.yyParser.lval.item.desc()) if context != "" { errMsg.WriteString(" in ") @@ -211,16 +197,28 @@ func (p *parser) recover(errp *error) { // // For more information, see https://godoc.org/golang.org/x/tools/cmd/goyacc. func (p *parser) Lex(lval *yySymType) int { + var typ ItemType + if p.injecting { - lval.item = p.inject p.injecting = false + return int(p.inject) } else { - lval.item = p.next() + // Skip comments. + for { + p.lex.NextItem(&lval.item) + typ = lval.item.Typ + if typ != COMMENT { + break + } + } } - typ := lval.item.Typ + if typ == ERROR { + p.errorf("%s", lval.item.Val) + } if typ == EOF { + lval.item.Typ = EOF p.InjectItem(0) } @@ -251,7 +249,7 @@ func (p *parser) InjectItem(typ ItemType) { panic("cannot inject symbol that isn't start symbol") } - p.inject = Item{Typ: typ} + p.inject = typ p.injecting = true } func (p *parser) newBinaryExpression(lhs Node, op Item, modifiers Node, rhs Node) *BinaryExpr { @@ -525,7 +523,7 @@ func parseDuration(ds string) (time.Duration, error) { func (p *parser) parseGenerated(startSymbol ItemType) interface{} { p.InjectItem(startSymbol) - yyParse(p) + p.yyParser.Parse(p) return p.generatedParserResult diff --git a/promql/parse_test.go b/promql/parse_test.go index 29991524e9..d663cc44c4 100644 --- a/promql/parse_test.go +++ b/promql/parse_test.go @@ -233,7 +233,7 @@ var testExpr = []struct { }, { input: "(1))", fail: true, - errMsg: "unexpected \")\"", + errMsg: "unexpected right parenthesis ')'", }, { input: "((1)", fail: true,