mirror of
https://github.com/google/go-jsonnet.git
synced 2025-08-12 09:17:11 +02:00
Initial separator lexing.
This commit is contained in:
parent
2b4d7535f5
commit
ffd82ef1df
@ -358,7 +358,21 @@ func (l *lexer) resetTokenStart() {
|
||||
l.tokenStartLoc = l.location()
|
||||
}
|
||||
|
||||
// tokenKindPostprocessors defines a transformation of the lexed token string
|
||||
// before it is stored in the tokens list. It is optional for each token kind.
|
||||
var tokenKindPostprocessors = map[tokenKind]func(string) string{
|
||||
tokenNumber: func(s string) string {
|
||||
// Get rid of underscore digit separators.
|
||||
return strings.ReplaceAll(s, "_", "")
|
||||
},
|
||||
}
|
||||
|
||||
func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) {
|
||||
// Run the postprocessor if the token kind has one defined.
|
||||
if pp, ok := tokenKindPostprocessors[kind]; ok {
|
||||
data = pp(data)
|
||||
}
|
||||
|
||||
l.tokens = append(l.tokens, token{
|
||||
kind: kind,
|
||||
fodder: l.fodder,
|
||||
@ -451,7 +465,7 @@ func (l *lexer) lexUntilNewline() (string, int, int) {
|
||||
// that the next rune to be served by the lexer will be a leading digit.
|
||||
func (l *lexer) lexNumber() error {
|
||||
// This function should be understood with reference to the linked image:
|
||||
// http://www.json.org/number.gif
|
||||
// https://www.json.org/img/number.png
|
||||
|
||||
// Note, we deviate from the json.org documentation as follows:
|
||||
// There is no reason to lex negative numbers as atomic tokens, it is better to parse them
|
||||
@ -465,9 +479,11 @@ func (l *lexer) lexNumber() error {
|
||||
numAfterOneToNine
|
||||
numAfterDot
|
||||
numAfterDigit
|
||||
numAfterUnderscore
|
||||
numAfterE
|
||||
numAfterExpSign
|
||||
numAfterExpDigit
|
||||
numAfterExpUnderscore
|
||||
)
|
||||
|
||||
state := numBegin
|
||||
@ -492,6 +508,9 @@ outerLoop:
|
||||
state = numAfterDot
|
||||
case 'e', 'E':
|
||||
state = numAfterE
|
||||
case '_':
|
||||
state = numAfterUnderscore
|
||||
|
||||
default:
|
||||
break outerLoop
|
||||
}
|
||||
@ -503,6 +522,8 @@ outerLoop:
|
||||
state = numAfterE
|
||||
case r >= '0' && r <= '9':
|
||||
state = numAfterOneToNine
|
||||
case r == '_':
|
||||
state = numAfterUnderscore
|
||||
default:
|
||||
break outerLoop
|
||||
}
|
||||
@ -521,9 +542,28 @@ outerLoop:
|
||||
state = numAfterE
|
||||
case r >= '0' && r <= '9':
|
||||
state = numAfterDigit
|
||||
case r == '_':
|
||||
state = numAfterUnderscore
|
||||
default:
|
||||
break outerLoop
|
||||
}
|
||||
|
||||
case numAfterUnderscore:
|
||||
// The only valid transition out of _ is to a digit.
|
||||
switch {
|
||||
case r == '_':
|
||||
return l.makeStaticErrorPoint(
|
||||
"Couldn't lex number, multiple consecutive _'s",
|
||||
l.location())
|
||||
|
||||
case r >= '0' && r <= '9':
|
||||
state = numAfterExpDigit
|
||||
|
||||
default:
|
||||
return l.makeStaticErrorPoint(
|
||||
fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)),
|
||||
l.location())
|
||||
}
|
||||
case numAfterE:
|
||||
switch {
|
||||
case r == '+' || r == '-':
|
||||
@ -545,9 +585,12 @@ outerLoop:
|
||||
}
|
||||
|
||||
case numAfterExpDigit:
|
||||
if r >= '0' && r <= '9' {
|
||||
switch {
|
||||
case r >= '0' && r <= '9':
|
||||
state = numAfterExpDigit
|
||||
} else {
|
||||
case r == '_':
|
||||
state = numAfterUnderscore
|
||||
default:
|
||||
break outerLoop
|
||||
}
|
||||
}
|
||||
@ -965,7 +1008,6 @@ func Lex(diagnosticFilename ast.DiagnosticFileName, importedFilename, input stri
|
||||
fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)),
|
||||
l.location())
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ -314,6 +314,26 @@ func TestNumber1epExc(t *testing.T) {
|
||||
SingleTest(t, "1e+!", "snippet:1:4 Couldn't lex number, junk after exponent sign: '!'", Tokens{})
|
||||
}
|
||||
|
||||
func TestNumberSeparators(t *testing.T) {
|
||||
|
||||
SingleTest(t, "123_456", "", Tokens{{kind: tokenNumber, data: "123456"}})
|
||||
|
||||
/*
|
||||
testLex("number 123_456", "123_456", {Token(Token::Kind::NUMBER, "123456")}, "");
|
||||
testLex("number 1_750_000", "1_750_000", {Token(Token::Kind::NUMBER, "1750000")}, "");
|
||||
testLex("number 1_2_3", "1_2_3", {Token(Token::Kind::NUMBER, "123")}, "");
|
||||
testLex("number 3.141_592", "3.141_592", {Token(Token::Kind::NUMBER, "3.141592")}, "");
|
||||
testLex("number 01_100", "01_100", {Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "1100")}, "");
|
||||
testLex("number 1_200.0", "1_200.0", {Token(Token::Kind::NUMBER, "1200.0")}, "");
|
||||
testLex("number 0e1_01", "0e1_01", {Token(Token::Kind::NUMBER, "0e101")}, "");
|
||||
testLex("number 10_10e3", "10_10e3", {Token(Token::Kind::NUMBER, "1010e3")}, "");
|
||||
testLex("number 2_3e1_2", "2_3e1_2", {Token(Token::Kind::NUMBER, "23e12")}, "");
|
||||
testLex("number 1.1_2e100", "1.1_2e100", {Token(Token::Kind::NUMBER, "1.12e100")}, "");
|
||||
testLex("number 1.1e-10_1", "1.1e-10_1", {Token(Token::Kind::NUMBER, "1.1e-101")}, "");
|
||||
testLex("number 9.109_383_56e-31", "9.109_383_56e-31", {Token(Token::Kind::NUMBER, "9.10938356e-31")}, "");
|
||||
*/
|
||||
}
|
||||
|
||||
func TestDoublestring1(t *testing.T) {
|
||||
SingleTest(t, "\"hi\"", "", Tokens{
|
||||
{kind: tokenStringDouble, data: "hi"},
|
||||
|
Loading…
Reference in New Issue
Block a user