mirror of
https://github.com/google/go-jsonnet.git
synced 2025-08-13 09:47:13 +02:00
Initial separator lexing.
This commit is contained in:
parent
2b4d7535f5
commit
ffd82ef1df
@ -358,7 +358,21 @@ func (l *lexer) resetTokenStart() {
|
|||||||
l.tokenStartLoc = l.location()
|
l.tokenStartLoc = l.location()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tokenKindPostprocessors defines a transformation of the lexed token string
|
||||||
|
// before it is stored in the tokens list. It is optional for each token kind.
|
||||||
|
var tokenKindPostprocessors = map[tokenKind]func(string) string{
|
||||||
|
tokenNumber: func(s string) string {
|
||||||
|
// Get rid of underscore digit separators.
|
||||||
|
return strings.ReplaceAll(s, "_", "")
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) {
|
func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) {
|
||||||
|
// Run the postprocessor if the token kind has one defined.
|
||||||
|
if pp, ok := tokenKindPostprocessors[kind]; ok {
|
||||||
|
data = pp(data)
|
||||||
|
}
|
||||||
|
|
||||||
l.tokens = append(l.tokens, token{
|
l.tokens = append(l.tokens, token{
|
||||||
kind: kind,
|
kind: kind,
|
||||||
fodder: l.fodder,
|
fodder: l.fodder,
|
||||||
@ -451,7 +465,7 @@ func (l *lexer) lexUntilNewline() (string, int, int) {
|
|||||||
// that the next rune to be served by the lexer will be a leading digit.
|
// that the next rune to be served by the lexer will be a leading digit.
|
||||||
func (l *lexer) lexNumber() error {
|
func (l *lexer) lexNumber() error {
|
||||||
// This function should be understood with reference to the linked image:
|
// This function should be understood with reference to the linked image:
|
||||||
// http://www.json.org/number.gif
|
// https://www.json.org/img/number.png
|
||||||
|
|
||||||
// Note, we deviate from the json.org documentation as follows:
|
// Note, we deviate from the json.org documentation as follows:
|
||||||
// There is no reason to lex negative numbers as atomic tokens, it is better to parse them
|
// There is no reason to lex negative numbers as atomic tokens, it is better to parse them
|
||||||
@ -465,9 +479,11 @@ func (l *lexer) lexNumber() error {
|
|||||||
numAfterOneToNine
|
numAfterOneToNine
|
||||||
numAfterDot
|
numAfterDot
|
||||||
numAfterDigit
|
numAfterDigit
|
||||||
|
numAfterUnderscore
|
||||||
numAfterE
|
numAfterE
|
||||||
numAfterExpSign
|
numAfterExpSign
|
||||||
numAfterExpDigit
|
numAfterExpDigit
|
||||||
|
numAfterExpUnderscore
|
||||||
)
|
)
|
||||||
|
|
||||||
state := numBegin
|
state := numBegin
|
||||||
@ -492,6 +508,9 @@ outerLoop:
|
|||||||
state = numAfterDot
|
state = numAfterDot
|
||||||
case 'e', 'E':
|
case 'e', 'E':
|
||||||
state = numAfterE
|
state = numAfterE
|
||||||
|
case '_':
|
||||||
|
state = numAfterUnderscore
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break outerLoop
|
break outerLoop
|
||||||
}
|
}
|
||||||
@ -503,6 +522,8 @@ outerLoop:
|
|||||||
state = numAfterE
|
state = numAfterE
|
||||||
case r >= '0' && r <= '9':
|
case r >= '0' && r <= '9':
|
||||||
state = numAfterOneToNine
|
state = numAfterOneToNine
|
||||||
|
case r == '_':
|
||||||
|
state = numAfterUnderscore
|
||||||
default:
|
default:
|
||||||
break outerLoop
|
break outerLoop
|
||||||
}
|
}
|
||||||
@ -521,9 +542,28 @@ outerLoop:
|
|||||||
state = numAfterE
|
state = numAfterE
|
||||||
case r >= '0' && r <= '9':
|
case r >= '0' && r <= '9':
|
||||||
state = numAfterDigit
|
state = numAfterDigit
|
||||||
|
case r == '_':
|
||||||
|
state = numAfterUnderscore
|
||||||
default:
|
default:
|
||||||
break outerLoop
|
break outerLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case numAfterUnderscore:
|
||||||
|
// The only valid transition out of _ is to a digit.
|
||||||
|
switch {
|
||||||
|
case r == '_':
|
||||||
|
return l.makeStaticErrorPoint(
|
||||||
|
"Couldn't lex number, multiple consecutive _'s",
|
||||||
|
l.location())
|
||||||
|
|
||||||
|
case r >= '0' && r <= '9':
|
||||||
|
state = numAfterExpDigit
|
||||||
|
|
||||||
|
default:
|
||||||
|
return l.makeStaticErrorPoint(
|
||||||
|
fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)),
|
||||||
|
l.location())
|
||||||
|
}
|
||||||
case numAfterE:
|
case numAfterE:
|
||||||
switch {
|
switch {
|
||||||
case r == '+' || r == '-':
|
case r == '+' || r == '-':
|
||||||
@ -545,9 +585,12 @@ outerLoop:
|
|||||||
}
|
}
|
||||||
|
|
||||||
case numAfterExpDigit:
|
case numAfterExpDigit:
|
||||||
if r >= '0' && r <= '9' {
|
switch {
|
||||||
|
case r >= '0' && r <= '9':
|
||||||
state = numAfterExpDigit
|
state = numAfterExpDigit
|
||||||
} else {
|
case r == '_':
|
||||||
|
state = numAfterUnderscore
|
||||||
|
default:
|
||||||
break outerLoop
|
break outerLoop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -965,7 +1008,6 @@ func Lex(diagnosticFilename ast.DiagnosticFileName, importedFilename, input stri
|
|||||||
fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)),
|
fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)),
|
||||||
l.location())
|
l.location())
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,6 +314,26 @@ func TestNumber1epExc(t *testing.T) {
|
|||||||
SingleTest(t, "1e+!", "snippet:1:4 Couldn't lex number, junk after exponent sign: '!'", Tokens{})
|
SingleTest(t, "1e+!", "snippet:1:4 Couldn't lex number, junk after exponent sign: '!'", Tokens{})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNumberSeparators(t *testing.T) {
|
||||||
|
|
||||||
|
SingleTest(t, "123_456", "", Tokens{{kind: tokenNumber, data: "123456"}})
|
||||||
|
|
||||||
|
/*
|
||||||
|
testLex("number 123_456", "123_456", {Token(Token::Kind::NUMBER, "123456")}, "");
|
||||||
|
testLex("number 1_750_000", "1_750_000", {Token(Token::Kind::NUMBER, "1750000")}, "");
|
||||||
|
testLex("number 1_2_3", "1_2_3", {Token(Token::Kind::NUMBER, "123")}, "");
|
||||||
|
testLex("number 3.141_592", "3.141_592", {Token(Token::Kind::NUMBER, "3.141592")}, "");
|
||||||
|
testLex("number 01_100", "01_100", {Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "1100")}, "");
|
||||||
|
testLex("number 1_200.0", "1_200.0", {Token(Token::Kind::NUMBER, "1200.0")}, "");
|
||||||
|
testLex("number 0e1_01", "0e1_01", {Token(Token::Kind::NUMBER, "0e101")}, "");
|
||||||
|
testLex("number 10_10e3", "10_10e3", {Token(Token::Kind::NUMBER, "1010e3")}, "");
|
||||||
|
testLex("number 2_3e1_2", "2_3e1_2", {Token(Token::Kind::NUMBER, "23e12")}, "");
|
||||||
|
testLex("number 1.1_2e100", "1.1_2e100", {Token(Token::Kind::NUMBER, "1.12e100")}, "");
|
||||||
|
testLex("number 1.1e-10_1", "1.1e-10_1", {Token(Token::Kind::NUMBER, "1.1e-101")}, "");
|
||||||
|
testLex("number 9.109_383_56e-31", "9.109_383_56e-31", {Token(Token::Kind::NUMBER, "9.10938356e-31")}, "");
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
func TestDoublestring1(t *testing.T) {
|
func TestDoublestring1(t *testing.T) {
|
||||||
SingleTest(t, "\"hi\"", "", Tokens{
|
SingleTest(t, "\"hi\"", "", Tokens{
|
||||||
{kind: tokenStringDouble, data: "hi"},
|
{kind: tokenStringDouble, data: "hi"},
|
||||||
|
Loading…
Reference in New Issue
Block a user