mirror of
https://github.com/google/go-jsonnet.git
synced 2025-09-29 09:21:03 +02:00
Port lexer changes from google/jsonnet 0c96da7 to 27ddf2c Fix #1
This commit is contained in:
parent
2282fdf13e
commit
c3f136d987
68
lexer.go
68
lexer.go
@ -55,7 +55,6 @@ const (
|
||||
tokenBraceR
|
||||
tokenBracketL
|
||||
tokenBracketR
|
||||
tokenColon
|
||||
tokenComma
|
||||
tokenDollar
|
||||
tokenDot
|
||||
@ -101,7 +100,6 @@ var tokenKindStrings = []string{
|
||||
tokenBraceR: "\"}\"",
|
||||
tokenBracketL: "\"[\"",
|
||||
tokenBracketR: "\"]\"",
|
||||
tokenColon: "\":\"",
|
||||
tokenComma: "\",\"",
|
||||
tokenDollar: "\"$\"",
|
||||
tokenDot: "\".\"",
|
||||
@ -197,7 +195,7 @@ func isIdentifier(r rune) bool {
|
||||
|
||||
func isSymbol(r rune) bool {
|
||||
switch r {
|
||||
case '&', '|', '^', '=', '<', '>', '*', '/', '%', '#':
|
||||
case '!', '$', ':', '~', '+', '-', '&', '|', '^', '=', '<', '>', '*', '/', '%':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@ -533,7 +531,7 @@ func (l *lexer) lexIdentifier() {
|
||||
}
|
||||
|
||||
// lexSymbol will lex a token that starts with a symbol. This could be a
|
||||
// comment, block quote or an operator. This function assumes that the next
|
||||
// C or C++ comment, block quote or an operator. This function assumes that the next
|
||||
// rune to be served by the lexer will be the first rune of the new token.
|
||||
func (l *lexer) lexSymbol() error {
|
||||
r := l.next()
|
||||
@ -550,16 +548,6 @@ func (l *lexer) lexSymbol() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r == '#' {
|
||||
l.resetTokenStart() // Throw out the leading #
|
||||
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
|
||||
}
|
||||
// Leave the '\n' in the lexer to be fodder for the next round
|
||||
l.backup()
|
||||
l.addCommentFodder(fodderCommentHash)
|
||||
return nil
|
||||
}
|
||||
|
||||
if r == '/' && l.peek() == '*' {
|
||||
commentStartLoc := l.tokenStartLoc
|
||||
l.next() // consume the '*'
|
||||
@ -640,10 +628,39 @@ func (l *lexer) lexSymbol() error {
|
||||
|
||||
// Assume any string of symbols is a single operator.
|
||||
for r = l.next(); isSymbol(r); r = l.next() {
|
||||
|
||||
// Not allowed // in operators
|
||||
if r == '/' && strings.HasPrefix(l.input[l.pos:], "/") {
|
||||
break
|
||||
}
|
||||
// Not allowed /* in operators
|
||||
if r == '/' && strings.HasPrefix(l.input[l.pos:], "*") {
|
||||
break
|
||||
}
|
||||
// Not allowed ||| in operators
|
||||
if r == '|' && strings.HasPrefix(l.input[l.pos:], "||") {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
l.backup()
|
||||
|
||||
// Operators are not allowed to end with + - ~ ! unless they are one rune long.
|
||||
// So, wind it back if we need to, but stop at the first rune.
|
||||
// This relies on the hack that all operator symbols are ASCII and thus there is
|
||||
// no need to treat this substring as general UTF-8.
|
||||
for r = rune(l.input[l.pos - 1]); l.pos > l.tokenStart + 1; l.pos-- {
|
||||
switch r {
|
||||
case '+', '-', '~', '!':
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if l.input[l.tokenStart:l.pos] == "$" {
|
||||
l.emitToken(tokenDollar)
|
||||
} else {
|
||||
l.emitToken(tokenOperator)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -665,12 +682,8 @@ func lex(fn string, input string) (tokens, error) {
|
||||
l.emitToken(tokenBracketL)
|
||||
case ']':
|
||||
l.emitToken(tokenBracketR)
|
||||
case ':':
|
||||
l.emitToken(tokenColon)
|
||||
case ',':
|
||||
l.emitToken(tokenComma)
|
||||
case '$':
|
||||
l.emitToken(tokenDollar)
|
||||
case '.':
|
||||
l.emitToken(tokenDot)
|
||||
case '(':
|
||||
@ -680,15 +693,6 @@ func lex(fn string, input string) (tokens, error) {
|
||||
case ';':
|
||||
l.emitToken(tokenSemicolon)
|
||||
|
||||
// Operators
|
||||
case '!':
|
||||
if l.peek() == '=' {
|
||||
_ = l.next()
|
||||
}
|
||||
l.emitToken(tokenOperator)
|
||||
case '~', '+', '-':
|
||||
l.emitToken(tokenOperator)
|
||||
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
l.backup()
|
||||
err = l.lexNumber()
|
||||
@ -733,6 +737,14 @@ func lex(fn string, input string) (tokens, error) {
|
||||
r = l.next()
|
||||
}
|
||||
}
|
||||
case '#':
|
||||
l.resetTokenStart() // Throw out the leading #
|
||||
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
|
||||
}
|
||||
// Leave the '\n' in the lexer to be fodder for the next round
|
||||
l.backup()
|
||||
l.addCommentFodder(fodderCommentHash)
|
||||
|
||||
default:
|
||||
if isIdentifierFirst(r) {
|
||||
l.backup()
|
||||
|
@ -38,7 +38,12 @@ var lexTests = []lexTest{
|
||||
{"brace R", "}", tokens{{kind: tokenBraceR, data: "}"}}, ""},
|
||||
{"bracket L", "[", tokens{{kind: tokenBracketL, data: "["}}, ""},
|
||||
{"bracket R", "]", tokens{{kind: tokenBracketR, data: "]"}}, ""},
|
||||
{"colon", ":", tokens{{kind: tokenColon, data: ":"}}, ""},
|
||||
{"colon", ":", tokens{{kind: tokenOperator, data: ":"}}, ""},
|
||||
{"colon2", "::", tokens{{kind: tokenOperator, data: "::"}}, ""},
|
||||
{"colon3", ":::", tokens{{kind: tokenOperator, data: ":::"}}, ""},
|
||||
{"arrow right", "->", tokens{{kind: tokenOperator, data: "->"}}, ""},
|
||||
{"less than minus", "<-", tokens{{kind: tokenOperator, data: "<"},
|
||||
{kind: tokenOperator, data: "-"}}, ""},
|
||||
{"comma", ",", tokens{{kind: tokenComma, data: ","}}, ""},
|
||||
{"dollar", "$", tokens{{kind: tokenDollar, data: "$"}}, ""},
|
||||
{"dot", ".", tokens{{kind: tokenDot, data: "."}}, ""},
|
||||
|
Loading…
x
Reference in New Issue
Block a user