Initial separator lexing.

This commit is contained in:
David Grant 2024-06-20 20:44:18 -07:00
parent 2b4d7535f5
commit ffd82ef1df
2 changed files with 67 additions and 5 deletions

View File

@ -358,7 +358,21 @@ func (l *lexer) resetTokenStart() {
l.tokenStartLoc = l.location() l.tokenStartLoc = l.location()
} }
// tokenKindPostprocessors defines a transformation of the lexed token string
// before it is stored in the tokens list. It is optional for each token kind.
var tokenKindPostprocessors = map[tokenKind]func(string) string{
tokenNumber: func(s string) string {
// Get rid of underscore digit separators.
return strings.ReplaceAll(s, "_", "")
},
}
func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) { func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) {
// Run the postprocessor if the token kind has one defined.
if pp, ok := tokenKindPostprocessors[kind]; ok {
data = pp(data)
}
l.tokens = append(l.tokens, token{ l.tokens = append(l.tokens, token{
kind: kind, kind: kind,
fodder: l.fodder, fodder: l.fodder,
@ -451,7 +465,7 @@ func (l *lexer) lexUntilNewline() (string, int, int) {
// that the next rune to be served by the lexer will be a leading digit. // that the next rune to be served by the lexer will be a leading digit.
func (l *lexer) lexNumber() error { func (l *lexer) lexNumber() error {
// This function should be understood with reference to the linked image: // This function should be understood with reference to the linked image:
// http://www.json.org/number.gif // https://www.json.org/img/number.png
// Note, we deviate from the json.org documentation as follows: // Note, we deviate from the json.org documentation as follows:
// There is no reason to lex negative numbers as atomic tokens, it is better to parse them // There is no reason to lex negative numbers as atomic tokens, it is better to parse them
@ -465,9 +479,11 @@ func (l *lexer) lexNumber() error {
numAfterOneToNine numAfterOneToNine
numAfterDot numAfterDot
numAfterDigit numAfterDigit
numAfterUnderscore
numAfterE numAfterE
numAfterExpSign numAfterExpSign
numAfterExpDigit numAfterExpDigit
numAfterExpUnderscore
) )
state := numBegin state := numBegin
@ -492,6 +508,9 @@ outerLoop:
state = numAfterDot state = numAfterDot
case 'e', 'E': case 'e', 'E':
state = numAfterE state = numAfterE
case '_':
state = numAfterUnderscore
default: default:
break outerLoop break outerLoop
} }
@ -503,6 +522,8 @@ outerLoop:
state = numAfterE state = numAfterE
case r >= '0' && r <= '9': case r >= '0' && r <= '9':
state = numAfterOneToNine state = numAfterOneToNine
case r == '_':
state = numAfterUnderscore
default: default:
break outerLoop break outerLoop
} }
@ -521,9 +542,28 @@ outerLoop:
state = numAfterE state = numAfterE
case r >= '0' && r <= '9': case r >= '0' && r <= '9':
state = numAfterDigit state = numAfterDigit
case r == '_':
state = numAfterUnderscore
default: default:
break outerLoop break outerLoop
} }
case numAfterUnderscore:
// The only valid transition out of _ is to a digit.
switch {
case r == '_':
return l.makeStaticErrorPoint(
"Couldn't lex number, multiple consecutive _'s",
l.location())
case r >= '0' && r <= '9':
state = numAfterExpDigit
default:
return l.makeStaticErrorPoint(
fmt.Sprintf("Couldn't lex number, junk after '_': %v", strconv.QuoteRuneToASCII(r)),
l.location())
}
case numAfterE: case numAfterE:
switch { switch {
case r == '+' || r == '-': case r == '+' || r == '-':
@ -545,9 +585,12 @@ outerLoop:
} }
case numAfterExpDigit: case numAfterExpDigit:
if r >= '0' && r <= '9' { switch {
case r >= '0' && r <= '9':
state = numAfterExpDigit state = numAfterExpDigit
} else { case r == '_':
state = numAfterUnderscore
default:
break outerLoop break outerLoop
} }
} }
@ -965,7 +1008,6 @@ func Lex(diagnosticFilename ast.DiagnosticFileName, importedFilename, input stri
fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)), fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)),
l.location()) l.location())
} }
} }
} }

View File

@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
@ -314,6 +314,26 @@ func TestNumber1epExc(t *testing.T) {
SingleTest(t, "1e+!", "snippet:1:4 Couldn't lex number, junk after exponent sign: '!'", Tokens{}) SingleTest(t, "1e+!", "snippet:1:4 Couldn't lex number, junk after exponent sign: '!'", Tokens{})
} }
func TestNumberSeparators(t *testing.T) {
SingleTest(t, "123_456", "", Tokens{{kind: tokenNumber, data: "123456"}})
/*
testLex("number 123_456", "123_456", {Token(Token::Kind::NUMBER, "123456")}, "");
testLex("number 1_750_000", "1_750_000", {Token(Token::Kind::NUMBER, "1750000")}, "");
testLex("number 1_2_3", "1_2_3", {Token(Token::Kind::NUMBER, "123")}, "");
testLex("number 3.141_592", "3.141_592", {Token(Token::Kind::NUMBER, "3.141592")}, "");
testLex("number 01_100", "01_100", {Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "1100")}, "");
testLex("number 1_200.0", "1_200.0", {Token(Token::Kind::NUMBER, "1200.0")}, "");
testLex("number 0e1_01", "0e1_01", {Token(Token::Kind::NUMBER, "0e101")}, "");
testLex("number 10_10e3", "10_10e3", {Token(Token::Kind::NUMBER, "1010e3")}, "");
testLex("number 2_3e1_2", "2_3e1_2", {Token(Token::Kind::NUMBER, "23e12")}, "");
testLex("number 1.1_2e100", "1.1_2e100", {Token(Token::Kind::NUMBER, "1.12e100")}, "");
testLex("number 1.1e-10_1", "1.1e-10_1", {Token(Token::Kind::NUMBER, "1.1e-101")}, "");
testLex("number 9.109_383_56e-31", "9.109_383_56e-31", {Token(Token::Kind::NUMBER, "9.10938356e-31")}, "");
*/
}
func TestDoublestring1(t *testing.T) { func TestDoublestring1(t *testing.T) {
SingleTest(t, "\"hi\"", "", Tokens{ SingleTest(t, "\"hi\"", "", Tokens{
{kind: tokenStringDouble, data: "hi"}, {kind: tokenStringDouble, data: "hi"},