diff --git a/promql/lex.go b/promql/lex.go index 9cdccd04b6..52957b2d93 100644 --- a/promql/lex.go +++ b/promql/lex.go @@ -16,6 +16,7 @@ package promql import ( "fmt" "strings" + "unicode" "unicode/utf8" ) @@ -465,6 +466,9 @@ func lexStatements(l *lexer) stateFn { case r == '"' || r == '\'': l.stringOpen = r return lexString + case r == '`': + l.stringOpen = r + return lexRawString case isAlpha(r) || r == ':': l.backup() return lexKeywordOrIdentifier @@ -523,6 +527,9 @@ func lexInsideBraces(l *lexer) stateFn { case r == '"' || r == '\'': l.stringOpen = r return lexString + case r == '`': + l.stringOpen = r + return lexRawString case r == '=': if l.next() == '~' { l.emit(itemEQLRegex) @@ -583,16 +590,79 @@ func lexValueSequence(l *lexer) stateFn { return lexValueSequence } +// lexEscape scans a string escape sequence. The initial escaping character (\) +// has already been seen. +// +// NOTE: This function as well as the helper function digitVal() and associated +// tests have been adapted from the corresponding functions in the "go/scanner" +// package of the Go standard library to work for Prometheus-style strings. +// None of the actual escaping/quoting logic was changed in this function - it +// was only modified to integrate with our lexer. +func lexEscape(l *lexer) { + var n int + var base, max uint32 + + ch := l.next() + switch ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen: + return + case '0', '1', '2', '3', '4', '5', '6', '7': + n, base, max = 3, 8, 255 + case 'x': + ch = l.next() + n, base, max = 2, 16, 255 + case 'u': + ch = l.next() + n, base, max = 4, 16, unicode.MaxRune + case 'U': + ch = l.next() + n, base, max = 8, 16, unicode.MaxRune + case eof: + l.errorf("escape sequence not terminated") + default: + l.errorf("unknown escape sequence %#U", ch) + } + + var x uint32 + for n > 0 { + d := uint32(digitVal(ch)) + if d >= base { + if ch == eof { + l.errorf("escape sequence not terminated") + } + l.errorf("illegal character %#U in escape sequence", ch) + } + x = x*base + d + ch = l.next() + n-- + } + + if x > max || 0xD800 <= x && x < 0xE000 { + l.errorf("escape sequence is an invalid Unicode code point") + } +} + +// digitVal returns the digit value of a rune or 16 in case the rune does not +// represent a valid digit. +func digitVal(ch rune) int { + switch { + case '0' <= ch && ch <= '9': + return int(ch - '0') + case 'a' <= ch && ch <= 'f': + return int(ch - 'a' + 10) + case 'A' <= ch && ch <= 'F': + return int(ch - 'A' + 10) + } + return 16 // Larger than any legal digit val. +} + // lexString scans a quoted string. The initial quote has already been seen. func lexString(l *lexer) stateFn { Loop: for { switch l.next() { case '\\': - if r := l.next(); r != eof && r != '\n' { - break - } - fallthrough + lexEscape(l) case eof, '\n': return l.errorf("unterminated quoted string") case l.stringOpen: @@ -603,6 +673,21 @@ Loop: return lexStatements } +// lexRawString scans a raw quoted string. The initial quote has already been seen. +func lexRawString(l *lexer) stateFn { +Loop: + for { + switch l.next() { + case eof: + return l.errorf("unterminated raw string") + case l.stringOpen: + break Loop + } + } + l.emit(itemString) + return lexStatements +} + // lexSpace scans a run of space characters. One space has already been seen. func lexSpace(l *lexer) stateFn { for isSpace(l.peek()) { diff --git a/promql/parse.go b/promql/parse.go index 283ecf7239..d871adc7a0 100644 --- a/promql/parse.go +++ b/promql/parse.go @@ -43,9 +43,9 @@ type ParseErr struct { func (e *ParseErr) Error() string { if e.Line == 0 { - return fmt.Sprintf("Parse error at char %d: %s", e.Pos, e.Err) + return fmt.Sprintf("parse error at char %d: %s", e.Pos, e.Err) } - return fmt.Sprintf("Parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err) + return fmt.Sprintf("parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err) } // ParseStmts parses the input and returns the resulting statements or any ocurring error. @@ -401,21 +401,21 @@ Loop: p.errorf("summary must not be defined twice") } hasSum = true - sum = trimOne(p.expect(itemString, ctx).val) + sum = p.unquoteString(p.expect(itemString, ctx).val) case itemDescription: if hasDesc { p.errorf("description must not be defined twice") } hasDesc = true - desc = trimOne(p.expect(itemString, ctx).val) + desc = p.unquoteString(p.expect(itemString, ctx).val) case itemRunbook: if hasRunbook { p.errorf("runbook must not be defined twice") } hasRunbook = true - runbook = trimOne(p.expect(itemString, ctx).val) + runbook = p.unquoteString(p.expect(itemString, ctx).val) default: p.backup() @@ -654,8 +654,7 @@ func (p *parser) primaryExpr() Expr { return &NumberLiteral{model.SampleValue(f)} case t.typ == itemString: - s := t.val[1 : len(t.val)-1] - return &StringLiteral{s} + return &StringLiteral{p.unquoteString(t.val)} case t.typ == itemLeftBrace: // Metric selector without metric name. @@ -843,7 +842,7 @@ func (p *parser) labelMatchers(operators ...itemType) metric.LabelMatchers { p.errorf("operator must be one of %q, is %q", operators, op) } - val := trimOne(p.expect(itemString, ctx).val) + val := p.unquoteString(p.expect(itemString, ctx).val) // Map the item to the respective match type. var matchType metric.MatchType @@ -1104,6 +1103,14 @@ func (p *parser) checkType(node Node) (typ model.ValueType) { return } +func (p *parser) unquoteString(s string) string { + unquoted, err := strutil.Unquote(s) + if err != nil { + p.errorf("error unquoting string %q: %s", s, err) + } + return unquoted +} + func parseDuration(ds string) (time.Duration, error) { dur, err := strutil.StringToDuration(ds) if err != nil { @@ -1114,14 +1121,3 @@ func parseDuration(ds string) (time.Duration, error) { } return dur, nil } - -// trimOne removes the first and last character from a string. -func trimOne(s string) string { - if len(s) > 0 { - s = s[1:] - } - if len(s) > 0 { - s = s[:len(s)-1] - } - return s -} diff --git a/promql/parse_test.go b/promql/parse_test.go index e270e00e47..ecbc95da87 100644 --- a/promql/parse_test.go +++ b/promql/parse_test.go @@ -1016,6 +1016,54 @@ var testExpr = []struct { fail: true, errMsg: `no valid expression found`, }, + // String quoting and escape sequence interpretation tests. + { + input: `"double-quoted string \" with escaped quote"`, + expected: &StringLiteral{ + Val: "double-quoted string \" with escaped quote", + }, + }, { + input: `'single-quoted string \' with escaped quote'`, + expected: &StringLiteral{ + Val: "single-quoted string ' with escaped quote", + }, + }, { + input: "`backtick-quoted string`", + expected: &StringLiteral{ + Val: "backtick-quoted string", + }, + }, { + input: `"\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111\U0001011111☺"`, + expected: &StringLiteral{ + Val: "\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111\U0001011111☺", + }, + }, { + input: `'\a\b\f\n\r\t\v\\\' - \xFF\377\u1234\U00010111\U0001011111☺'`, + expected: &StringLiteral{ + Val: "\a\b\f\n\r\t\v\\' - \xFF\377\u1234\U00010111\U0001011111☺", + }, + }, { + input: "`" + `\a\b\f\n\r\t\v\\\"\' - \xFF\377\u1234\U00010111\U0001011111☺` + "`", + expected: &StringLiteral{ + Val: `\a\b\f\n\r\t\v\\\"\' - \xFF\377\u1234\U00010111\U0001011111☺`, + }, + }, { + input: "`\\``", + fail: true, + errMsg: "could not parse remaining input", + }, { + input: `"\`, + fail: true, + errMsg: "escape sequence not terminated", + }, { + input: `"\c"`, + fail: true, + errMsg: "unknown escape sequence U+0063 'c'", + }, { + input: `"\x."`, + fail: true, + errMsg: "illegal character U+002E '.' in escape sequence", + }, } func TestParseExpressions(t *testing.T) { diff --git a/util/strutil/quote.go b/util/strutil/quote.go index 81be1c5b67..981ad473d2 100644 --- a/util/strutil/quote.go +++ b/util/strutil/quote.go @@ -28,7 +28,9 @@ var ErrSyntax = errors.New("invalid syntax") // NOTE: This function as well as the necessary helper functions below // (unquoteChar, contains, unhex) and associated tests have been adapted from // the corresponding functions in the "strconv" package of the Go standard -// library to work for Prometheus-style strings. +// library to work for Prometheus-style strings. Go's special-casing for single +// quotes was removed and single quoted strings are now treated the same as +// double quoted ones. func Unquote(s string) (t string, err error) { n := len(s) if n < 2 { @@ -103,7 +105,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, return rune(s[0]), false, s[1:], nil } - // hard case: c is backslash + // Hard case: c is backslash. if len(s) <= 1 { err = ErrSyntax return @@ -151,7 +153,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, } s = s[n:] if c == 'x' { - // single-byte string, possibly not UTF-8 + // Single-byte string, possibly not UTF-8. value = v break } @@ -167,7 +169,7 @@ func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err = ErrSyntax return } - for j := 0; j < 2; j++ { // one digit already; two more + for j := 0; j < 2; j++ { // One digit already; two more. x := rune(s[j]) - '0' if x < 0 || x > 7 { err = ErrSyntax diff --git a/util/strutil/quote_test.go b/util/strutil/quote_test.go index 35bd6842c8..0068ada0d2 100644 --- a/util/strutil/quote_test.go +++ b/util/strutil/quote_test.go @@ -110,7 +110,7 @@ func TestUnquote(t *testing.T) { } } - // run the quote tests too, backward + // Run the quote tests too, backward. for _, tt := range quotetests { if in, err := Unquote(tt.out); in != tt.in { t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in) diff --git a/web/api/legacy/api_test.go b/web/api/legacy/api_test.go index ce0ce91d8e..770e0af0e2 100644 --- a/web/api/legacy/api_test.go +++ b/web/api/legacy/api_test.go @@ -53,7 +53,7 @@ func TestQuery(t *testing.T) { { queryStr: "", status: http.StatusOK, - bodyRe: `{"type":"error","value":"Parse error at char 1: no expression found in input","version":1}`, + bodyRe: `{"type":"error","value":"parse error at char 1: no expression found in input","version":1}`, }, { queryStr: "expr=1.4", @@ -83,7 +83,7 @@ func TestQuery(t *testing.T) { { queryStr: "expr=(badexpression", status: http.StatusOK, - bodyRe: `{"type":"error","value":"Parse error at char 15: unclosed left parenthesis","version":1}`, + bodyRe: `{"type":"error","value":"parse error at char 15: unclosed left parenthesis","version":1}`, }, }