mirror of
https://github.com/google/go-jsonnet.git
synced 2025-09-29 17:31:02 +02:00
Merge pull request #5 from sparkprime/lexer_changes
Port lexer changes from google/jsonnet 0c96da7 to 27ddf2c Fix #1
This commit is contained in:
commit
04c51f7034
70
lexer.go
70
lexer.go
@ -55,7 +55,6 @@ const (
|
|||||||
tokenBraceR
|
tokenBraceR
|
||||||
tokenBracketL
|
tokenBracketL
|
||||||
tokenBracketR
|
tokenBracketR
|
||||||
tokenColon
|
|
||||||
tokenComma
|
tokenComma
|
||||||
tokenDollar
|
tokenDollar
|
||||||
tokenDot
|
tokenDot
|
||||||
@ -101,7 +100,6 @@ var tokenKindStrings = []string{
|
|||||||
tokenBraceR: "\"}\"",
|
tokenBraceR: "\"}\"",
|
||||||
tokenBracketL: "\"[\"",
|
tokenBracketL: "\"[\"",
|
||||||
tokenBracketR: "\"]\"",
|
tokenBracketR: "\"]\"",
|
||||||
tokenColon: "\":\"",
|
|
||||||
tokenComma: "\",\"",
|
tokenComma: "\",\"",
|
||||||
tokenDollar: "\"$\"",
|
tokenDollar: "\"$\"",
|
||||||
tokenDot: "\".\"",
|
tokenDot: "\".\"",
|
||||||
@ -197,7 +195,7 @@ func isIdentifier(r rune) bool {
|
|||||||
|
|
||||||
func isSymbol(r rune) bool {
|
func isSymbol(r rune) bool {
|
||||||
switch r {
|
switch r {
|
||||||
case '&', '|', '^', '=', '<', '>', '*', '/', '%', '#':
|
case '!', '$', ':', '~', '+', '-', '&', '|', '^', '=', '<', '>', '*', '/', '%':
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@ -533,7 +531,7 @@ func (l *lexer) lexIdentifier() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// lexSymbol will lex a token that starts with a symbol. This could be a
|
// lexSymbol will lex a token that starts with a symbol. This could be a
|
||||||
// comment, block quote or an operator. This function assumes that the next
|
// C or C++ comment, block quote or an operator. This function assumes that the next
|
||||||
// rune to be served by the lexer will be the first rune of the new token.
|
// rune to be served by the lexer will be the first rune of the new token.
|
||||||
func (l *lexer) lexSymbol() error {
|
func (l *lexer) lexSymbol() error {
|
||||||
r := l.next()
|
r := l.next()
|
||||||
@ -550,16 +548,6 @@ func (l *lexer) lexSymbol() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if r == '#' {
|
|
||||||
l.resetTokenStart() // Throw out the leading #
|
|
||||||
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
|
|
||||||
}
|
|
||||||
// Leave the '\n' in the lexer to be fodder for the next round
|
|
||||||
l.backup()
|
|
||||||
l.addCommentFodder(fodderCommentHash)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if r == '/' && l.peek() == '*' {
|
if r == '/' && l.peek() == '*' {
|
||||||
commentStartLoc := l.tokenStartLoc
|
commentStartLoc := l.tokenStartLoc
|
||||||
l.next() // consume the '*'
|
l.next() // consume the '*'
|
||||||
@ -640,10 +628,39 @@ func (l *lexer) lexSymbol() error {
|
|||||||
|
|
||||||
// Assume any string of symbols is a single operator.
|
// Assume any string of symbols is a single operator.
|
||||||
for r = l.next(); isSymbol(r); r = l.next() {
|
for r = l.next(); isSymbol(r); r = l.next() {
|
||||||
|
// Not allowed // in operators
|
||||||
|
if r == '/' && strings.HasPrefix(l.input[l.pos:], "/") {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Not allowed /* in operators
|
||||||
|
if r == '/' && strings.HasPrefix(l.input[l.pos:], "*") {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Not allowed ||| in operators
|
||||||
|
if r == '|' && strings.HasPrefix(l.input[l.pos:], "||") {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
l.backup()
|
l.backup()
|
||||||
l.emitToken(tokenOperator)
|
|
||||||
|
// Operators are not allowed to end with + - ~ ! unless they are one rune long.
|
||||||
|
// So, wind it back if we need to, but stop at the first rune.
|
||||||
|
// This relies on the hack that all operator symbols are ASCII and thus there is
|
||||||
|
// no need to treat this substring as general UTF-8.
|
||||||
|
for r = rune(l.input[l.pos - 1]); l.pos > l.tokenStart + 1; l.pos-- {
|
||||||
|
switch r {
|
||||||
|
case '+', '-', '~', '!':
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if l.input[l.tokenStart:l.pos] == "$" {
|
||||||
|
l.emitToken(tokenDollar)
|
||||||
|
} else {
|
||||||
|
l.emitToken(tokenOperator)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -665,12 +682,8 @@ func lex(fn string, input string) (tokens, error) {
|
|||||||
l.emitToken(tokenBracketL)
|
l.emitToken(tokenBracketL)
|
||||||
case ']':
|
case ']':
|
||||||
l.emitToken(tokenBracketR)
|
l.emitToken(tokenBracketR)
|
||||||
case ':':
|
|
||||||
l.emitToken(tokenColon)
|
|
||||||
case ',':
|
case ',':
|
||||||
l.emitToken(tokenComma)
|
l.emitToken(tokenComma)
|
||||||
case '$':
|
|
||||||
l.emitToken(tokenDollar)
|
|
||||||
case '.':
|
case '.':
|
||||||
l.emitToken(tokenDot)
|
l.emitToken(tokenDot)
|
||||||
case '(':
|
case '(':
|
||||||
@ -680,15 +693,6 @@ func lex(fn string, input string) (tokens, error) {
|
|||||||
case ';':
|
case ';':
|
||||||
l.emitToken(tokenSemicolon)
|
l.emitToken(tokenSemicolon)
|
||||||
|
|
||||||
// Operators
|
|
||||||
case '!':
|
|
||||||
if l.peek() == '=' {
|
|
||||||
_ = l.next()
|
|
||||||
}
|
|
||||||
l.emitToken(tokenOperator)
|
|
||||||
case '~', '+', '-':
|
|
||||||
l.emitToken(tokenOperator)
|
|
||||||
|
|
||||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||||
l.backup()
|
l.backup()
|
||||||
err = l.lexNumber()
|
err = l.lexNumber()
|
||||||
@ -733,6 +737,14 @@ func lex(fn string, input string) (tokens, error) {
|
|||||||
r = l.next()
|
r = l.next()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case '#':
|
||||||
|
l.resetTokenStart() // Throw out the leading #
|
||||||
|
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
|
||||||
|
}
|
||||||
|
// Leave the '\n' in the lexer to be fodder for the next round
|
||||||
|
l.backup()
|
||||||
|
l.addCommentFodder(fodderCommentHash)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if isIdentifierFirst(r) {
|
if isIdentifierFirst(r) {
|
||||||
l.backup()
|
l.backup()
|
||||||
|
@ -38,7 +38,12 @@ var lexTests = []lexTest{
|
|||||||
{"brace R", "}", tokens{{kind: tokenBraceR, data: "}"}}, ""},
|
{"brace R", "}", tokens{{kind: tokenBraceR, data: "}"}}, ""},
|
||||||
{"bracket L", "[", tokens{{kind: tokenBracketL, data: "["}}, ""},
|
{"bracket L", "[", tokens{{kind: tokenBracketL, data: "["}}, ""},
|
||||||
{"bracket R", "]", tokens{{kind: tokenBracketR, data: "]"}}, ""},
|
{"bracket R", "]", tokens{{kind: tokenBracketR, data: "]"}}, ""},
|
||||||
{"colon", ":", tokens{{kind: tokenColon, data: ":"}}, ""},
|
{"colon", ":", tokens{{kind: tokenOperator, data: ":"}}, ""},
|
||||||
|
{"colon2", "::", tokens{{kind: tokenOperator, data: "::"}}, ""},
|
||||||
|
{"colon3", ":::", tokens{{kind: tokenOperator, data: ":::"}}, ""},
|
||||||
|
{"arrow right", "->", tokens{{kind: tokenOperator, data: "->"}}, ""},
|
||||||
|
{"less than minus", "<-", tokens{{kind: tokenOperator, data: "<"},
|
||||||
|
{kind: tokenOperator, data: "-"}}, ""},
|
||||||
{"comma", ",", tokens{{kind: tokenComma, data: ","}}, ""},
|
{"comma", ",", tokens{{kind: tokenComma, data: ","}}, ""},
|
||||||
{"dollar", "$", tokens{{kind: tokenDollar, data: "$"}}, ""},
|
{"dollar", "$", tokens{{kind: tokenDollar, data: "$"}}, ""},
|
||||||
{"dot", ".", tokens{{kind: tokenDot, data: "."}}, ""},
|
{"dot", ".", tokens{{kind: tokenDot, data: "."}}, ""},
|
||||||
|
@ -144,7 +144,7 @@ func (p *parser) parse(prec precedence) (astNode, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
var msg astNode
|
var msg astNode
|
||||||
if p.peek().kind == tokenColon {
|
if p.peek().kind == tokenOperator && p.peek().data == ":" {
|
||||||
p.pop()
|
p.pop()
|
||||||
msg, err = p.parse(maxPrecedence)
|
msg, err = p.parse(maxPrecedence)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user