Add verbatim string support

This commit is contained in:
Stanisław Barzowski 2017-08-10 17:26:37 -04:00
parent da7c66ad55
commit a94bfef764
6 changed files with 85 additions and 12 deletions

2
ast.go
View File

@ -348,6 +348,8 @@ const (
astStringSingle astLiteralStringKind = iota astStringSingle astLiteralStringKind = iota
astStringDouble astStringDouble
astStringBlock astStringBlock
astVerbatimStringDouble
astVerbatimStringSingle
) )
// astLiteralString represents a JSON string // astLiteralString represents a JSON string

View File

@ -324,13 +324,16 @@ func desugar(astPtr *astNode, objLevel int) (err error) {
// Nothing to do. // Nothing to do.
case *astLiteralString: case *astLiteralString:
unescaped, err := stringUnescape(ast.Loc(), ast.value) if ast.kind != astVerbatimStringDouble && ast.kind != astVerbatimStringSingle {
if err != nil { unescaped, err := stringUnescape(ast.Loc(), ast.value)
return err if err != nil {
return err
}
// TODO(sbarzowski) perhaps store unescaped in a separate field...
ast.value = unescaped
ast.kind = astStringDouble
ast.blockIndent = ""
} }
ast.value = unescaped
ast.kind = astStringDouble
ast.blockIndent = ""
case *astObject: case *astObject:
// Hidden variable to allow $ binding. // Hidden variable to allow $ binding.

View File

@ -70,6 +70,8 @@ const (
tokenStringBlock tokenStringBlock
tokenStringDouble tokenStringDouble
tokenStringSingle tokenStringSingle
tokenVerbatimStringDouble
tokenVerbatimStringSingle
// Keywords // Keywords
tokenAssert tokenAssert
@ -109,12 +111,14 @@ var tokenKindStrings = []string{
tokenSemicolon: "\";\"", tokenSemicolon: "\";\"",
// Arbitrary length lexemes // Arbitrary length lexemes
tokenIdentifier: "IDENTIFIER", tokenIdentifier: "IDENTIFIER",
tokenNumber: "NUMBER", tokenNumber: "NUMBER",
tokenOperator: "OPERATOR", tokenOperator: "OPERATOR",
tokenStringBlock: "STRING_BLOCK", tokenStringBlock: "STRING_BLOCK",
tokenStringDouble: "STRING_DOUBLE", tokenStringDouble: "STRING_DOUBLE",
tokenStringSingle: "STRING_SINGLE", tokenStringSingle: "STRING_SINGLE",
tokenVerbatimStringDouble: "VERBATIM_STRING_DOUBLE",
tokenVerbatimStringSingle: "VERBATIM_STRING_SINGLE",
// Keywords // Keywords
tokenAssert: "assert", tokenAssert: "assert",
@ -735,6 +739,45 @@ func lex(fn string, input string) (tokens, error) {
r = l.next() r = l.next()
} }
} }
case '@':
// Verbatim string literals.
// ' and " quoting is interpreted here, unlike non-verbatim strings
// where it is done later by jsonnet_string_unescape. This is OK
// in this case because no information is lost by resoving the
// repeated quote into a single quote, so we can go back to the
// original form in the formatter.
var data []rune
stringStartLoc := l.prevLocation()
quot := l.next()
var kind tokenKind
if quot == '"' {
kind = tokenVerbatimStringDouble
} else if quot == '\'' {
kind = tokenVerbatimStringSingle
} else {
return nil, makeStaticErrorPoint(
fmt.Sprintf("Couldn't lex verbatim string, junk after '@': %v", quot),
l.fileName,
stringStartLoc,
)
}
for r = l.next(); ; r = l.next() {
if r == lexEOF {
return nil, makeStaticErrorPoint("Unterminated String", l.fileName, stringStartLoc)
} else if r == quot {
if l.peek() == quot {
l.next()
data = append(data, r)
} else {
l.emitFullToken(kind, string(data), "", "")
l.resetTokenStart()
break
}
} else {
data = append(data, r)
}
}
case '#': case '#':
l.resetTokenStart() // Throw out the leading # l.resetTokenStart() // Throw out the leading #
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() { for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {

View File

@ -226,6 +226,16 @@ test
"block string no ws:1:1 Text block's first line must start with whitespace", "block string no ws:1:1 Text block's first line must start with whitespace",
}, },
{"verbatim_string1", `@""`, tokens{{kind: tokenVerbatimStringDouble, data: ""}}, ""},
{"verbatim_string2", `@''`, tokens{{kind: tokenVerbatimStringSingle, data: ""}}, ""},
{"verbatim_string3", `@""""`, tokens{{kind: tokenVerbatimStringDouble, data: `"`}}, ""},
{"verbatim_string4", `@''''`, tokens{{kind: tokenVerbatimStringSingle, data: "'"}}, ""},
{"verbatim_string5", `@"\n"`, tokens{{kind: tokenVerbatimStringDouble, data: "\\n"}}, ""},
{"verbatim_string6", `@"''"`, tokens{{kind: tokenVerbatimStringDouble, data: "''"}}, ""},
{"verbatim_string_unterminated", `@"blah blah`, tokens{}, "verbatim_string_unterminated:1:1 Unterminated String"},
{"verbatim_string_junk", `@blah blah`, tokens{}, "verbatim_string_junk:1:1 Couldn't lex verbatim string, junk after '@': 98"},
{"op *", "*", tokens{{kind: tokenOperator, data: "*"}}, ""}, {"op *", "*", tokens{{kind: tokenOperator, data: "*"}}, ""},
{"op /", "/", tokens{{kind: tokenOperator, data: "/"}}, ""}, {"op /", "/", tokens{{kind: tokenOperator, data: "/"}}, ""},
{"op %", "%", tokens{{kind: tokenOperator, data: "%"}}, ""}, {"op %", "%", tokens{{kind: tokenOperator, data: "%"}}, ""},

View File

@ -40,6 +40,7 @@ var mainTests = []mainTest{
{"simple_arith_string2", "\"aaa\" + \"\"", "\"aaa\"", ""}, {"simple_arith_string2", "\"aaa\" + \"\"", "\"aaa\"", ""},
{"simple_arith_string3", "\"\" + \"bbb\"", "\"bbb\"", ""}, {"simple_arith_string3", "\"\" + \"bbb\"", "\"bbb\"", ""},
{"simple_arith_string_empty", "\"\" + \"\"", "\"\"", ""}, {"simple_arith_string_empty", "\"\" + \"\"", "\"\"", ""},
{"verbatim_string", `@"blah ☺"`, `"blah ☺"`, ""},
{"empty_array", "[]", "[ ]", ""}, {"empty_array", "[]", "[ ]", ""},
{"array", "[1, 2, 1 + 2]", "[\n 1,\n 2,\n 3\n]", ""}, {"array", "[1, 2, 1 + 2]", "[\n 1,\n 2,\n 3\n]", ""},
{"empty_object", "{}", "{ }", ""}, {"empty_object", "{}", "{ }", ""},

View File

@ -346,6 +346,8 @@ func (p *parser) parseObjectRemainder(tok *token) (astNode, *token, error) {
kind: astStringBlock, kind: astStringBlock,
blockIndent: next.stringBlockIndent, blockIndent: next.stringBlockIndent,
} }
// TODO(sbarzowski) are verbatim string literals allowed here?
// if so, maybe it's time we extracted string literal creation somewhere...
default: default:
kind = astObjectFieldExpr kind = astObjectFieldExpr
var err error var err error
@ -658,6 +660,18 @@ func (p *parser) parseTerminal() (astNode, error) {
kind: astStringDouble, kind: astStringDouble,
blockIndent: tok.stringBlockIndent, blockIndent: tok.stringBlockIndent,
}, nil }, nil
case tokenVerbatimStringDouble:
return &astLiteralString{
astNodeBase: astNodeBase{loc: tok.loc},
value: tok.data,
kind: astVerbatimStringDouble,
}, nil
case tokenVerbatimStringSingle:
return &astLiteralString{
astNodeBase: astNodeBase{loc: tok.loc},
value: tok.data,
kind: astVerbatimStringSingle,
}, nil
case tokenFalse: case tokenFalse:
return &astLiteralBoolean{ return &astLiteralBoolean{
astNodeBase: astNodeBase{loc: tok.loc}, astNodeBase: astNodeBase{loc: tok.loc},