mirror of
https://github.com/google/go-jsonnet.git
synced 2025-08-08 07:17:12 +02:00
Initial commit with lexer
This commit is contained in:
commit
a04a6cf2e3
202
LICENSE
Normal file
202
LICENSE
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
5
README.md
Normal file
5
README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# go-jsonnet
|
||||||
|
|
||||||
|
This is a port of [jsonnet](http://jsonnet.org/) to go. It is very much a work in progress.
|
||||||
|
|
||||||
|
This implementation is largely based on the the [jsonnet C++ implementation](https://github.com/google/jsonnet).
|
681
lexer.go
Normal file
681
lexer.go
Normal file
@ -0,0 +1,681 @@
|
|||||||
|
package jsonnet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Fodder
|
||||||
|
//
|
||||||
|
// Fodder is stuff that is usually thrown away by lexers/preprocessors but is
|
||||||
|
// kept so that the source can be round tripped with full fidelity.
|
||||||
|
type fodderKind int
|
||||||
|
|
||||||
|
const (
|
||||||
|
fodderWhitespace fodderKind = iota
|
||||||
|
fodderCommentC
|
||||||
|
fodderCommentCpp
|
||||||
|
fodderCommentHash
|
||||||
|
)
|
||||||
|
|
||||||
|
type fodderElement struct {
|
||||||
|
kind fodderKind
|
||||||
|
data string
|
||||||
|
}
|
||||||
|
|
||||||
|
type fodder []fodderElement
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token
|
||||||
|
|
||||||
|
type tokenKind int
|
||||||
|
|
||||||
|
const (
|
||||||
|
tokenInvalid tokenKind = iota
|
||||||
|
|
||||||
|
// Symbols
|
||||||
|
tokenBraceL
|
||||||
|
tokenBraceR
|
||||||
|
tokenBracketL
|
||||||
|
tokenBracketR
|
||||||
|
tokenColon
|
||||||
|
tokenComma
|
||||||
|
tokenDollar
|
||||||
|
tokenDot
|
||||||
|
tokenParenL
|
||||||
|
tokenParenR
|
||||||
|
tokenSemicolon
|
||||||
|
|
||||||
|
// Arbitrary length lexemes
|
||||||
|
tokenIdentifier
|
||||||
|
tokenNumber
|
||||||
|
tokenOperator
|
||||||
|
tokenStringDouble
|
||||||
|
tokenStringSingle
|
||||||
|
tokenStringBlock
|
||||||
|
|
||||||
|
// Keywords
|
||||||
|
tokenAssert
|
||||||
|
tokenElse
|
||||||
|
tokenError
|
||||||
|
tokenFalse
|
||||||
|
tokenFor
|
||||||
|
tokenFunction
|
||||||
|
tokenIf
|
||||||
|
tokenImport
|
||||||
|
tokenImportStr
|
||||||
|
tokenIn
|
||||||
|
tokenLocal
|
||||||
|
tokenNullLit
|
||||||
|
tokenTailStrict
|
||||||
|
tokenThen
|
||||||
|
tokenSelf
|
||||||
|
tokenSuper
|
||||||
|
tokenTrue
|
||||||
|
|
||||||
|
// A special token that holds line/column information about the end of the
|
||||||
|
// file.
|
||||||
|
tokenEndOfFile
|
||||||
|
)
|
||||||
|
|
||||||
|
type token struct {
|
||||||
|
kind tokenKind // The type of the token
|
||||||
|
fodder fodder // Any fodder the occurs before this token
|
||||||
|
data string // Content of the token if it is not a keyword
|
||||||
|
|
||||||
|
// Extra info for when kind == tokenStringBlock
|
||||||
|
stringBlockIndent string // The sequence of whitespace that indented the block.
|
||||||
|
stringBlockTermIndent string // This is always fewer whitespace characters than in stringBlockIndent.
|
||||||
|
|
||||||
|
loc LocationRange
|
||||||
|
}
|
||||||
|
|
||||||
|
type tokens []token
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Helpers
|
||||||
|
|
||||||
|
func isUpper(r rune) bool {
|
||||||
|
return r >= 'A' && r <= 'Z'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLower(r rune) bool {
|
||||||
|
return r >= 'a' && r <= 'z'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isNumber(r rune) bool {
|
||||||
|
return r >= '0' && r <= '9'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isIdentifierFirst(r rune) bool {
|
||||||
|
return isUpper(r) || isLower(r) || r == '_'
|
||||||
|
}
|
||||||
|
|
||||||
|
func isIdentifier(r rune) bool {
|
||||||
|
return isIdentifierFirst(r) || isNumber(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isSymbol(r rune) bool {
|
||||||
|
switch r {
|
||||||
|
case '&', '|', '^', '=', '<', '>', '*', '/', '%', '#':
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that b has at least the same whitespace prefix as a and returns the
|
||||||
|
// amount of this whitespace, otherwise returns 0. If a has no whitespace
|
||||||
|
// prefix than return 0.
|
||||||
|
func checkWhitespace(a, b string) int {
|
||||||
|
i := 0
|
||||||
|
for ; i < len(a); i++ {
|
||||||
|
if a[i] != ' ' && a[i] != '\t' {
|
||||||
|
// a has run out of whitespace and b matched up to this point. Return
|
||||||
|
// result.
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
if i >= len(b) {
|
||||||
|
// We ran off the edge of b while a still has whitespace. Return 0 as
|
||||||
|
// failure.
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if a[i] != b[i] {
|
||||||
|
// a has whitespace but b does not. Return 0 as failure.
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We ran off the end of a and b kept up
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Lexer
|
||||||
|
|
||||||
|
type lexer struct {
|
||||||
|
fileName string // The file name being lexed, only used for errors
|
||||||
|
input string // The input string
|
||||||
|
|
||||||
|
pos int // Current byte position in input
|
||||||
|
lineNumber int // Current line number for pos
|
||||||
|
lineStart int // Byte position of start of line
|
||||||
|
|
||||||
|
// Data about the state position of the lexer before previous call to
|
||||||
|
// 'next'. If this state is lost then prevPos is set to lexEOF and panic
|
||||||
|
// ensues.
|
||||||
|
prevPos int // Byte position of last rune read
|
||||||
|
prevLineNumber int // The line number before last rune read
|
||||||
|
prevLineStart int // The line start before last rune read
|
||||||
|
|
||||||
|
tokens tokens // The tokens that we've generated so far
|
||||||
|
|
||||||
|
// Information about the token we are working on right now
|
||||||
|
fodder fodder
|
||||||
|
tokenStart int
|
||||||
|
tokenStartLoc Location
|
||||||
|
}
|
||||||
|
|
||||||
|
const lexEOF = -1
|
||||||
|
|
||||||
|
func makeLexer(fn string, input string) *lexer {
|
||||||
|
return &lexer{
|
||||||
|
fileName: fn,
|
||||||
|
input: input,
|
||||||
|
lineNumber: 1,
|
||||||
|
prevPos: lexEOF,
|
||||||
|
prevLineNumber: 1,
|
||||||
|
tokenStartLoc: Location{Line: 1, Column: 1},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// next returns the next rune in the input.
|
||||||
|
func (l *lexer) next() rune {
|
||||||
|
if int(l.pos) >= len(l.input) {
|
||||||
|
l.prevPos = l.pos
|
||||||
|
return lexEOF
|
||||||
|
}
|
||||||
|
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||||
|
l.prevPos = l.pos
|
||||||
|
l.pos += w
|
||||||
|
if r == '\n' {
|
||||||
|
l.prevLineNumber = l.lineNumber
|
||||||
|
l.prevLineStart = l.lineStart
|
||||||
|
l.lineNumber += 1
|
||||||
|
l.lineStart = l.pos
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *lexer) acceptN(n int) {
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
l.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// peek returns but does not consume the next rune in the input.
|
||||||
|
func (l *lexer) peek() rune {
|
||||||
|
r := l.next()
|
||||||
|
l.backup()
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// backup steps back one rune. Can only be called once per call of next.
|
||||||
|
func (l *lexer) backup() {
|
||||||
|
if l.prevPos == lexEOF {
|
||||||
|
panic("backup called with no valid previous rune")
|
||||||
|
}
|
||||||
|
l.lineNumber = l.prevLineNumber
|
||||||
|
l.lineStart = l.prevLineStart
|
||||||
|
l.pos = l.prevPos
|
||||||
|
l.prevPos = lexEOF
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *lexer) location() Location {
|
||||||
|
return Location{Line: l.lineNumber, Column: l.pos - l.lineStart + 1}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *lexer) prevLocation() Location {
|
||||||
|
if l.prevPos == lexEOF {
|
||||||
|
panic("prevLocation called with no valid previous rune")
|
||||||
|
}
|
||||||
|
return Location{Line: l.prevLineNumber, Column: l.prevPos - l.prevLineStart + 1}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the current working token start to the current cursor position. This
|
||||||
|
// may throw away some characters. This does not throw away any accumulated
|
||||||
|
// fodder.
|
||||||
|
func (l *lexer) resetTokenStart() {
|
||||||
|
l.tokenStart = l.pos
|
||||||
|
l.tokenStartLoc = l.location()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) {
|
||||||
|
l.tokens = append(l.tokens, token{
|
||||||
|
kind: kind,
|
||||||
|
fodder: l.fodder,
|
||||||
|
data: data,
|
||||||
|
stringBlockIndent: stringBlockIndent,
|
||||||
|
stringBlockTermIndent: stringBlockTermIndent,
|
||||||
|
loc: makeLocationRange(l.fileName, l.tokenStartLoc, l.location()),
|
||||||
|
})
|
||||||
|
l.fodder = fodder{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *lexer) emitToken(kind tokenKind) {
|
||||||
|
l.emitFullToken(kind, l.input[l.tokenStart:l.pos], "", "")
|
||||||
|
l.resetTokenStart()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *lexer) addWhitespaceFodder() {
|
||||||
|
fodderData := l.input[l.tokenStart:l.pos]
|
||||||
|
if len(l.fodder) == 0 || l.fodder[len(l.fodder)-1].kind != fodderWhitespace {
|
||||||
|
l.fodder = append(l.fodder, fodderElement{kind: fodderWhitespace, data: fodderData})
|
||||||
|
} else {
|
||||||
|
l.fodder[len(l.fodder)-1].data += fodderData
|
||||||
|
}
|
||||||
|
l.resetTokenStart()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *lexer) addCommentFodder(kind fodderKind) {
|
||||||
|
fodderData := l.input[l.tokenStart:l.pos]
|
||||||
|
l.fodder = append(l.fodder, fodderElement{kind: kind, data: fodderData})
|
||||||
|
l.resetTokenStart()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *lexer) addFodder(kind fodderKind, data string) {
|
||||||
|
l.fodder = append(l.fodder, fodderElement{kind: kind, data: data})
|
||||||
|
}
|
||||||
|
|
||||||
|
// lexNumber will consume a number and emit a token. It is assumed
|
||||||
|
// that the next rune to be served by the lexer will be a leading digit.
|
||||||
|
func (l *lexer) lexNumber() error {
|
||||||
|
// This function should be understood with reference to the linked image:
|
||||||
|
// http://www.json.org/number.gif
|
||||||
|
|
||||||
|
// Note, we deviate from the json.org documentation as follows:
|
||||||
|
// There is no reason to lex negative numbers as atomic tokens, it is better to parse them
|
||||||
|
// as a unary operator combined with a numeric literal. This avoids x-1 being tokenized as
|
||||||
|
// <identifier> <number> instead of the intended <identifier> <binop> <number>.
|
||||||
|
|
||||||
|
type numLexState int
|
||||||
|
const (
|
||||||
|
numBegin numLexState = iota
|
||||||
|
numAfterZero
|
||||||
|
numAfterOneToNine
|
||||||
|
numAfterDot
|
||||||
|
numAfterDigit
|
||||||
|
numAfterE
|
||||||
|
numAfterExpSign
|
||||||
|
numAfterExpDigit
|
||||||
|
)
|
||||||
|
|
||||||
|
state := numBegin
|
||||||
|
for true {
|
||||||
|
r := l.next()
|
||||||
|
switch state {
|
||||||
|
case numBegin:
|
||||||
|
switch {
|
||||||
|
case r == '0':
|
||||||
|
state = numAfterZero
|
||||||
|
case r >= '1' && r <= '9':
|
||||||
|
state = numAfterOneToNine
|
||||||
|
default:
|
||||||
|
return makeStaticErrorPoint(
|
||||||
|
"Couldn't lex number", l.fileName, l.prevLocation())
|
||||||
|
}
|
||||||
|
case numAfterZero:
|
||||||
|
switch r {
|
||||||
|
case '.':
|
||||||
|
state = numAfterDot
|
||||||
|
case 'e', 'E':
|
||||||
|
state = numAfterE
|
||||||
|
default:
|
||||||
|
goto end
|
||||||
|
}
|
||||||
|
case numAfterOneToNine:
|
||||||
|
switch {
|
||||||
|
case r == '.':
|
||||||
|
state = numAfterDot
|
||||||
|
case r == 'e' || r == 'E':
|
||||||
|
state = numAfterE
|
||||||
|
case r >= '0' && r <= '9':
|
||||||
|
state = numAfterOneToNine
|
||||||
|
default:
|
||||||
|
goto end
|
||||||
|
}
|
||||||
|
case numAfterDot:
|
||||||
|
switch {
|
||||||
|
case r >= '0' && r <= '9':
|
||||||
|
state = numAfterDigit
|
||||||
|
default:
|
||||||
|
return makeStaticErrorPoint(
|
||||||
|
fmt.Sprintf("Couldn't lex number, junk after decimal point: %v", strconv.QuoteRuneToASCII(r)),
|
||||||
|
l.fileName, l.prevLocation())
|
||||||
|
}
|
||||||
|
case numAfterDigit:
|
||||||
|
switch {
|
||||||
|
case r == 'e' || r == 'E':
|
||||||
|
state = numAfterE
|
||||||
|
case r >= '0' && r <= '9':
|
||||||
|
state = numAfterDigit
|
||||||
|
default:
|
||||||
|
goto end
|
||||||
|
}
|
||||||
|
case numAfterE:
|
||||||
|
switch {
|
||||||
|
case r == '+' || r == '-':
|
||||||
|
state = numAfterExpSign
|
||||||
|
case r >= '0' && r <= '9':
|
||||||
|
state = numAfterExpDigit
|
||||||
|
default:
|
||||||
|
return makeStaticErrorPoint(
|
||||||
|
fmt.Sprintf("Couldn't lex number, junk after 'E': %v", strconv.QuoteRuneToASCII(r)),
|
||||||
|
l.fileName, l.prevLocation())
|
||||||
|
}
|
||||||
|
case numAfterExpSign:
|
||||||
|
if r >= '0' && r <= '9' {
|
||||||
|
state = numAfterExpDigit
|
||||||
|
} else {
|
||||||
|
return makeStaticErrorPoint(
|
||||||
|
fmt.Sprintf("Couldn't lex number, junk after exponent sign: %v", strconv.QuoteRuneToASCII(r)),
|
||||||
|
l.fileName, l.prevLocation())
|
||||||
|
}
|
||||||
|
|
||||||
|
case numAfterExpDigit:
|
||||||
|
if r >= '0' && r <= '9' {
|
||||||
|
state = numAfterExpDigit
|
||||||
|
} else {
|
||||||
|
goto end
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end:
|
||||||
|
l.backup()
|
||||||
|
l.emitToken(tokenNumber)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// lexIdentifier will consume a identifer and emit a token. It is assumed
|
||||||
|
// that the next rune to be served by the lexer will be a leading digit. This
|
||||||
|
// may emit a keyword or an identifier.
|
||||||
|
func (l *lexer) lexIdentifier() {
|
||||||
|
r := l.next()
|
||||||
|
if !isIdentifierFirst(r) {
|
||||||
|
panic("Unexpected character in lexIdentifier")
|
||||||
|
}
|
||||||
|
for ; r != lexEOF; r = l.next() {
|
||||||
|
if !isIdentifier(r) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l.backup()
|
||||||
|
|
||||||
|
switch l.input[l.tokenStart:l.pos] {
|
||||||
|
case "assert":
|
||||||
|
l.emitToken(tokenAssert)
|
||||||
|
case "else":
|
||||||
|
l.emitToken(tokenElse)
|
||||||
|
case "error":
|
||||||
|
l.emitToken(tokenError)
|
||||||
|
case "false":
|
||||||
|
l.emitToken(tokenFalse)
|
||||||
|
case "for":
|
||||||
|
l.emitToken(tokenFor)
|
||||||
|
case "function":
|
||||||
|
l.emitToken(tokenFunction)
|
||||||
|
case "if":
|
||||||
|
l.emitToken(tokenIf)
|
||||||
|
case "import":
|
||||||
|
l.emitToken(tokenImport)
|
||||||
|
case "importstr":
|
||||||
|
l.emitToken(tokenImportStr)
|
||||||
|
case "in":
|
||||||
|
l.emitToken(tokenIn)
|
||||||
|
case "local":
|
||||||
|
l.emitToken(tokenLocal)
|
||||||
|
case "null":
|
||||||
|
l.emitToken(tokenNullLit)
|
||||||
|
case "self":
|
||||||
|
l.emitToken(tokenSelf)
|
||||||
|
case "super":
|
||||||
|
l.emitToken(tokenSuper)
|
||||||
|
case "tailstrict":
|
||||||
|
l.emitToken(tokenTailStrict)
|
||||||
|
case "then":
|
||||||
|
l.emitToken(tokenThen)
|
||||||
|
case "true":
|
||||||
|
l.emitToken(tokenTrue)
|
||||||
|
default:
|
||||||
|
// Not a keyword, assume it is an identifier
|
||||||
|
l.emitToken(tokenIdentifier)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// lexSymbol will lex a token that starts with a symbol. This could be a
|
||||||
|
// comment, block quote or an operator. This function assumes that the next
|
||||||
|
// rune to be served by the lexer will be the first rune of the new token.
|
||||||
|
func (l *lexer) lexSymbol() error {
|
||||||
|
r := l.next()
|
||||||
|
|
||||||
|
// Single line C++ style comment
|
||||||
|
if r == '/' && l.peek() == '/' {
|
||||||
|
l.next()
|
||||||
|
l.resetTokenStart() // Throw out the leading //
|
||||||
|
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
|
||||||
|
}
|
||||||
|
// Leave the '\n' in the lexer to be fodder for the next round
|
||||||
|
l.backup()
|
||||||
|
l.addCommentFodder(fodderCommentCpp)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if r == '#' {
|
||||||
|
l.resetTokenStart() // Throw out the leading #
|
||||||
|
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
|
||||||
|
}
|
||||||
|
// Leave the '\n' in the lexer to be fodder for the next round
|
||||||
|
l.backup()
|
||||||
|
l.addCommentFodder(fodderCommentHash)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if r == '/' && l.peek() == '*' {
|
||||||
|
commentStartLoc := l.tokenStartLoc
|
||||||
|
l.next() // consume the '*'
|
||||||
|
l.resetTokenStart() // Throw out the leading /*
|
||||||
|
for r = l.next(); ; r = l.next() {
|
||||||
|
if r == lexEOF {
|
||||||
|
return makeStaticErrorPoint("Multi-line comment has no terminating */.",
|
||||||
|
l.fileName, commentStartLoc)
|
||||||
|
}
|
||||||
|
if r == '*' && l.peek() == '/' {
|
||||||
|
commentData := l.input[l.tokenStart : l.pos-1] // Don't include trailing */
|
||||||
|
l.addFodder(fodderCommentC, commentData)
|
||||||
|
l.next() // Skip past '/'
|
||||||
|
l.resetTokenStart() // Start next token at this point
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if r == '|' && strings.HasPrefix(l.input[l.pos:], "||\n") {
|
||||||
|
commentStartLoc := l.tokenStartLoc
|
||||||
|
l.acceptN(3) // Skip "||\n"
|
||||||
|
var cb bytes.Buffer
|
||||||
|
|
||||||
|
// Skip leading blank lines
|
||||||
|
for r = l.next(); r == '\n'; r = l.next() {
|
||||||
|
cb.WriteRune(r)
|
||||||
|
}
|
||||||
|
l.backup()
|
||||||
|
numWhiteSpace := checkWhitespace(l.input[l.pos:], l.input[l.pos:])
|
||||||
|
stringBlockIndent := l.input[l.pos : l.pos+numWhiteSpace]
|
||||||
|
if numWhiteSpace == 0 {
|
||||||
|
return makeStaticErrorPoint("Text block's first line must start with whitespace.",
|
||||||
|
l.fileName, commentStartLoc)
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
if numWhiteSpace <= 0 {
|
||||||
|
panic("Unexpected value for numWhiteSpace")
|
||||||
|
}
|
||||||
|
l.acceptN(numWhiteSpace)
|
||||||
|
for r = l.next(); r != '\n'; r = l.next() {
|
||||||
|
if r == lexEOF {
|
||||||
|
return makeStaticErrorPoint("Unexpected EOF",
|
||||||
|
l.fileName, commentStartLoc)
|
||||||
|
}
|
||||||
|
cb.WriteRune(r)
|
||||||
|
}
|
||||||
|
cb.WriteRune('\n')
|
||||||
|
|
||||||
|
// Skip any blank lines
|
||||||
|
for r = l.next(); r == '\n'; r = l.next() {
|
||||||
|
cb.WriteRune(r)
|
||||||
|
}
|
||||||
|
l.backup()
|
||||||
|
|
||||||
|
// Look at the next line
|
||||||
|
numWhiteSpace = checkWhitespace(stringBlockIndent, l.input[l.pos:])
|
||||||
|
if numWhiteSpace == 0 {
|
||||||
|
// End of the text block
|
||||||
|
var stringBlockTermIndent string
|
||||||
|
for r = l.next(); r == ' ' || r == '\t'; r = l.next() {
|
||||||
|
stringBlockTermIndent += string(r)
|
||||||
|
}
|
||||||
|
l.backup()
|
||||||
|
if !strings.HasPrefix(l.input[l.pos:], "|||") {
|
||||||
|
return makeStaticErrorPoint("Text block not terminated with |||",
|
||||||
|
l.fileName, commentStartLoc)
|
||||||
|
}
|
||||||
|
l.acceptN(3) // Skip '|||'
|
||||||
|
l.emitFullToken(tokenStringBlock, cb.String(),
|
||||||
|
stringBlockIndent, stringBlockTermIndent)
|
||||||
|
l.resetTokenStart()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assume any string of symbols is a single operator.
|
||||||
|
for r = l.next(); isSymbol(r); r = l.next() {
|
||||||
|
|
||||||
|
}
|
||||||
|
l.backup()
|
||||||
|
l.emitToken(tokenOperator)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func lex(fn string, input string) (tokens, error) {
|
||||||
|
l := makeLexer(fn, input)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
for r := l.next(); r != lexEOF; r = l.next() {
|
||||||
|
switch r {
|
||||||
|
case ' ', '\t', '\r', '\n':
|
||||||
|
l.addWhitespaceFodder()
|
||||||
|
continue
|
||||||
|
case '{':
|
||||||
|
l.emitToken(tokenBraceL)
|
||||||
|
case '}':
|
||||||
|
l.emitToken(tokenBraceR)
|
||||||
|
case '[':
|
||||||
|
l.emitToken(tokenBracketL)
|
||||||
|
case ']':
|
||||||
|
l.emitToken(tokenBracketR)
|
||||||
|
case ':':
|
||||||
|
l.emitToken(tokenColon)
|
||||||
|
case ',':
|
||||||
|
l.emitToken(tokenComma)
|
||||||
|
case '$':
|
||||||
|
l.emitToken(tokenDollar)
|
||||||
|
case '.':
|
||||||
|
l.emitToken(tokenDot)
|
||||||
|
case '(':
|
||||||
|
l.emitToken(tokenParenL)
|
||||||
|
case ')':
|
||||||
|
l.emitToken(tokenParenR)
|
||||||
|
case ';':
|
||||||
|
l.emitToken(tokenSemicolon)
|
||||||
|
|
||||||
|
// Operators
|
||||||
|
case '!':
|
||||||
|
if l.peek() == '=' {
|
||||||
|
_ = l.next()
|
||||||
|
}
|
||||||
|
l.emitToken(tokenOperator)
|
||||||
|
case '~', '+', '-':
|
||||||
|
l.emitToken(tokenOperator)
|
||||||
|
|
||||||
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||||
|
l.backup()
|
||||||
|
err = l.lexNumber()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// String literals
|
||||||
|
case '"':
|
||||||
|
stringStartLoc := l.prevLocation()
|
||||||
|
l.resetTokenStart() // Don't include the quotes in the token data
|
||||||
|
for r = l.next(); ; r = l.next() {
|
||||||
|
if r == lexEOF {
|
||||||
|
return nil, makeStaticErrorPoint("Unterminated String", l.fileName, stringStartLoc)
|
||||||
|
}
|
||||||
|
if r == '"' {
|
||||||
|
l.backup()
|
||||||
|
l.emitToken(tokenStringDouble)
|
||||||
|
_ = l.next()
|
||||||
|
l.resetTokenStart()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if r == '\\' && l.peek() != lexEOF {
|
||||||
|
r = l.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case '\'':
|
||||||
|
stringStartLoc := l.prevLocation()
|
||||||
|
l.resetTokenStart() // Don't include the quotes in the token data
|
||||||
|
for r = l.next(); ; r = l.next() {
|
||||||
|
if r == lexEOF {
|
||||||
|
return nil, makeStaticErrorPoint("Unterminated String", l.fileName, stringStartLoc)
|
||||||
|
}
|
||||||
|
if r == '\'' {
|
||||||
|
l.backup()
|
||||||
|
l.emitToken(tokenStringSingle)
|
||||||
|
r = l.next()
|
||||||
|
l.resetTokenStart()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if r == '\\' && l.peek() != lexEOF {
|
||||||
|
r = l.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if isIdentifierFirst(r) {
|
||||||
|
l.backup()
|
||||||
|
l.lexIdentifier()
|
||||||
|
} else if isSymbol(r) {
|
||||||
|
l.backup()
|
||||||
|
err = l.lexSymbol()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return nil, makeStaticErrorPoint(
|
||||||
|
fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)),
|
||||||
|
l.fileName, l.prevLocation())
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We are currently at the EOF. Emit a special token to capture any
|
||||||
|
// trailing fodder
|
||||||
|
l.emitToken(tokenEndOfFile)
|
||||||
|
return l.tokens, nil
|
||||||
|
}
|
255
lexer_test.go
Normal file
255
lexer_test.go
Normal file
@ -0,0 +1,255 @@
|
|||||||
|
package jsonnet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
type lexTest struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
tokens tokens
|
||||||
|
errString string
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
tEOF = token{kind: tokenEndOfFile}
|
||||||
|
)
|
||||||
|
|
||||||
|
var lexTests = []lexTest{
|
||||||
|
{"empty", "", tokens{}, ""},
|
||||||
|
{"whitespace", " \t\n\r\r\n", tokens{}, ""},
|
||||||
|
|
||||||
|
{"brace L", "{", tokens{{kind: tokenBraceL, data: "{"}}, ""},
|
||||||
|
{"brace R", "}", tokens{{kind: tokenBraceR, data: "}"}}, ""},
|
||||||
|
{"bracket L", "[", tokens{{kind: tokenBracketL, data: "["}}, ""},
|
||||||
|
{"bracket R", "]", tokens{{kind: tokenBracketR, data: "]"}}, ""},
|
||||||
|
{"colon", ":", tokens{{kind: tokenColon, data: ":"}}, ""},
|
||||||
|
{"comma", ",", tokens{{kind: tokenComma, data: ","}}, ""},
|
||||||
|
{"dollar", "$", tokens{{kind: tokenDollar, data: "$"}}, ""},
|
||||||
|
{"dot", ".", tokens{{kind: tokenDot, data: "."}}, ""},
|
||||||
|
{"paren L", "(", tokens{{kind: tokenParenL, data: "("}}, ""},
|
||||||
|
{"paren R", ")", tokens{{kind: tokenParenR, data: ")"}}, ""},
|
||||||
|
{"semicolon", ";", tokens{{kind: tokenSemicolon, data: ";"}}, ""},
|
||||||
|
|
||||||
|
{"not 1", "!", tokens{{kind: tokenOperator, data: "!"}}, ""},
|
||||||
|
{"not 2", "! ", tokens{{kind: tokenOperator, data: "!"}}, ""},
|
||||||
|
{"not equal", "!=", tokens{{kind: tokenOperator, data: "!="}}, ""},
|
||||||
|
{"tilde", "~", tokens{{kind: tokenOperator, data: "~"}}, ""},
|
||||||
|
{"plus", "+", tokens{{kind: tokenOperator, data: "+"}}, ""},
|
||||||
|
{"minus", "-", tokens{{kind: tokenOperator, data: "-"}}, ""},
|
||||||
|
|
||||||
|
{"number 0", "0", tokens{{kind: tokenNumber, data: "0"}}, ""},
|
||||||
|
{"number 1", "1", tokens{{kind: tokenNumber, data: "1"}}, ""},
|
||||||
|
{"number 1.0", "1.0", tokens{{kind: tokenNumber, data: "1.0"}}, ""},
|
||||||
|
{"number 0.1", "0.1", tokens{{kind: tokenNumber, data: "0.1"}}, ""},
|
||||||
|
{"number 0e100", "0e100", tokens{{kind: tokenNumber, data: "0e100"}}, ""},
|
||||||
|
{"number 1e100", "1e100", tokens{{kind: tokenNumber, data: "1e100"}}, ""},
|
||||||
|
{"number 1.1e100", "1.1e100", tokens{{kind: tokenNumber, data: "1.1e100"}}, ""},
|
||||||
|
{"number 1.1e-100", "1.1e-100", tokens{{kind: tokenNumber, data: "1.1e-100"}}, ""},
|
||||||
|
{"number 1.1e+100", "1.1e+100", tokens{{kind: tokenNumber, data: "1.1e+100"}}, ""},
|
||||||
|
{"number 0100", "0100", tokens{
|
||||||
|
{kind: tokenNumber, data: "0"},
|
||||||
|
{kind: tokenNumber, data: "100"},
|
||||||
|
}, ""},
|
||||||
|
{"number 10+10", "10+10", tokens{
|
||||||
|
{kind: tokenNumber, data: "10"},
|
||||||
|
{kind: tokenOperator, data: "+"},
|
||||||
|
{kind: tokenNumber, data: "10"},
|
||||||
|
}, ""},
|
||||||
|
{"number 1.+3", "1.+3", tokens{}, "number 1.+3:1:3 Couldn't lex number, junk after decimal point: '+'"},
|
||||||
|
{"number 1e!", "1e!", tokens{}, "number 1e!:1:3 Couldn't lex number, junk after 'E': '!'"},
|
||||||
|
{"number 1e+!", "1e+!", tokens{}, "number 1e+!:1:4 Couldn't lex number, junk after exponent sign: '!'"},
|
||||||
|
|
||||||
|
{"double string \"hi\"", "\"hi\"", tokens{{kind: tokenStringDouble, data: "hi"}}, ""},
|
||||||
|
{"double string \"hi nl\"", "\"hi\n\"", tokens{{kind: tokenStringDouble, data: "hi\n"}}, ""},
|
||||||
|
{"double string \"hi\\\"\"", "\"hi\\\"\"", tokens{{kind: tokenStringDouble, data: "hi\\\""}}, ""},
|
||||||
|
{"double string \"hi\\nl\"", "\"hi\\\n\"", tokens{{kind: tokenStringDouble, data: "hi\\\n"}}, ""},
|
||||||
|
{"double string \"hi", "\"hi", tokens{}, "double string \"hi:1:1 Unterminated String"},
|
||||||
|
|
||||||
|
{"single string 'hi'", "'hi'", tokens{{kind: tokenStringSingle, data: "hi"}}, ""},
|
||||||
|
{"single string 'hi nl'", "'hi\n'", tokens{{kind: tokenStringSingle, data: "hi\n"}}, ""},
|
||||||
|
{"single string 'hi\\''", "'hi\\''", tokens{{kind: tokenStringSingle, data: "hi\\'"}}, ""},
|
||||||
|
{"single string 'hi\\nl'", "'hi\\\n'", tokens{{kind: tokenStringSingle, data: "hi\\\n"}}, ""},
|
||||||
|
{"single string 'hi", "'hi", tokens{}, "single string 'hi:1:1 Unterminated String"},
|
||||||
|
|
||||||
|
{"assert", "assert", tokens{{kind: tokenAssert, data: "assert"}}, ""},
|
||||||
|
{"else", "else", tokens{{kind: tokenElse, data: "else"}}, ""},
|
||||||
|
{"error", "error", tokens{{kind: tokenError, data: "error"}}, ""},
|
||||||
|
{"false", "false", tokens{{kind: tokenFalse, data: "false"}}, ""},
|
||||||
|
{"for", "for", tokens{{kind: tokenFor, data: "for"}}, ""},
|
||||||
|
{"function", "function", tokens{{kind: tokenFunction, data: "function"}}, ""},
|
||||||
|
{"if", "if", tokens{{kind: tokenIf, data: "if"}}, ""},
|
||||||
|
{"import", "import", tokens{{kind: tokenImport, data: "import"}}, ""},
|
||||||
|
{"importstr", "importstr", tokens{{kind: tokenImportStr, data: "importstr"}}, ""},
|
||||||
|
{"in", "in", tokens{{kind: tokenIn, data: "in"}}, ""},
|
||||||
|
{"local", "local", tokens{{kind: tokenLocal, data: "local"}}, ""},
|
||||||
|
{"null", "null", tokens{{kind: tokenNullLit, data: "null"}}, ""},
|
||||||
|
{"self", "self", tokens{{kind: tokenSelf, data: "self"}}, ""},
|
||||||
|
{"super", "super", tokens{{kind: tokenSuper, data: "super"}}, ""},
|
||||||
|
{"tailstrict", "tailstrict", tokens{{kind: tokenTailStrict, data: "tailstrict"}}, ""},
|
||||||
|
{"then", "then", tokens{{kind: tokenThen, data: "then"}}, ""},
|
||||||
|
{"true", "true", tokens{{kind: tokenTrue, data: "true"}}, ""},
|
||||||
|
|
||||||
|
{"identifier", "foobar", tokens{{kind: tokenIdentifier, data: "foobar"}}, ""},
|
||||||
|
|
||||||
|
{"c++ comment", "// hi", tokens{}, ""}, // This test doesn't look at fodder (yet?)
|
||||||
|
{"hash comment", "# hi", tokens{}, ""}, // This test doesn't look at fodder (yet?)
|
||||||
|
{"c comment", "/* hi */", tokens{}, ""}, // This test doesn't look at fodder (yet?)
|
||||||
|
|
||||||
|
{
|
||||||
|
"block string spaces",
|
||||||
|
`|||
|
||||||
|
test
|
||||||
|
more
|
||||||
|
|||
|
||||||
|
foo
|
||||||
|
|||`,
|
||||||
|
tokens{
|
||||||
|
{
|
||||||
|
kind: tokenStringBlock,
|
||||||
|
data: "test\n more\n|||\n foo\n",
|
||||||
|
stringBlockIndent: " ",
|
||||||
|
stringBlockTermIndent: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"block string tabs",
|
||||||
|
`|||
|
||||||
|
test
|
||||||
|
more
|
||||||
|
|||
|
||||||
|
foo
|
||||||
|
|||`,
|
||||||
|
tokens{
|
||||||
|
{
|
||||||
|
kind: tokenStringBlock,
|
||||||
|
data: "test\n more\n|||\n foo\n",
|
||||||
|
stringBlockIndent: "\t",
|
||||||
|
stringBlockTermIndent: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"block string mixed",
|
||||||
|
`|||
|
||||||
|
test
|
||||||
|
more
|
||||||
|
|||
|
||||||
|
foo
|
||||||
|
|||`,
|
||||||
|
tokens{
|
||||||
|
{
|
||||||
|
kind: tokenStringBlock,
|
||||||
|
data: "test\n more\n|||\n foo\n",
|
||||||
|
stringBlockIndent: "\t \t",
|
||||||
|
stringBlockTermIndent: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"block string blanks",
|
||||||
|
`|||
|
||||||
|
|
||||||
|
test
|
||||||
|
|
||||||
|
|
||||||
|
more
|
||||||
|
|||
|
||||||
|
foo
|
||||||
|
|||`,
|
||||||
|
tokens{
|
||||||
|
{
|
||||||
|
kind: tokenStringBlock,
|
||||||
|
data: "\ntest\n\n\n more\n|||\n foo\n",
|
||||||
|
stringBlockIndent: " ",
|
||||||
|
stringBlockTermIndent: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"block string bad indent",
|
||||||
|
`|||
|
||||||
|
test
|
||||||
|
foo
|
||||||
|
|||`,
|
||||||
|
tokens{},
|
||||||
|
"block string bad indent:1:1 Text block not terminated with |||",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"block string eof",
|
||||||
|
`|||
|
||||||
|
test`,
|
||||||
|
tokens{},
|
||||||
|
"block string eof:1:1 Unexpected EOF",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"block string not term",
|
||||||
|
`|||
|
||||||
|
test
|
||||||
|
`,
|
||||||
|
tokens{},
|
||||||
|
"block string not term:1:1 Text block not terminated with |||",
|
||||||
|
},
|
||||||
|
|
||||||
|
{"op *", "*", tokens{{kind: tokenOperator, data: "*"}}, ""},
|
||||||
|
{"op /", "/", tokens{{kind: tokenOperator, data: "/"}}, ""},
|
||||||
|
{"op %", "%", tokens{{kind: tokenOperator, data: "%"}}, ""},
|
||||||
|
{"op &", "&", tokens{{kind: tokenOperator, data: "&"}}, ""},
|
||||||
|
{"op |", "|", tokens{{kind: tokenOperator, data: "|"}}, ""},
|
||||||
|
{"op ^", "^", tokens{{kind: tokenOperator, data: "^"}}, ""},
|
||||||
|
{"op =", "=", tokens{{kind: tokenOperator, data: "="}}, ""},
|
||||||
|
{"op <", "<", tokens{{kind: tokenOperator, data: "<"}}, ""},
|
||||||
|
{"op >", ">", tokens{{kind: tokenOperator, data: ">"}}, ""},
|
||||||
|
{"op >==|", ">==|", tokens{{kind: tokenOperator, data: ">==|"}}, ""},
|
||||||
|
|
||||||
|
{"junk", "💩", tokens{}, "junk:1:1 Could not lex the character '\\U0001f4a9'"},
|
||||||
|
}
|
||||||
|
|
||||||
|
func tokensEqual(ts1, ts2 tokens) bool {
|
||||||
|
if len(ts1) != len(ts2) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range ts1 {
|
||||||
|
t1, t2 := ts1[i], ts2[i]
|
||||||
|
if t1.kind != t2.kind {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if t1.data != t2.data {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if t1.stringBlockIndent != t2.stringBlockIndent {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if t1.stringBlockTermIndent != t2.stringBlockTermIndent {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLex(t *testing.T) {
|
||||||
|
for _, test := range lexTests {
|
||||||
|
// Copy the test tokens and append an EOF token
|
||||||
|
testTokens := append(tokens(nil), test.tokens...)
|
||||||
|
testTokens = append(testTokens, tEOF)
|
||||||
|
tokens, err := lex(test.name, test.input)
|
||||||
|
var errString string
|
||||||
|
if err != nil {
|
||||||
|
errString = err.Error()
|
||||||
|
}
|
||||||
|
if errString != test.errString {
|
||||||
|
t.Errorf("%s: error result does not match. got\n\t%+v\nexpected\n\t%+v",
|
||||||
|
test.name, errString, test.errString)
|
||||||
|
}
|
||||||
|
if err == nil && !tokensEqual(tokens, testTokens) {
|
||||||
|
t.Errorf("%s: got\n\t%+v\nexpected\n\t%+v", test.name, tokens, testTokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: test fodder, test position reporting
|
95
static_error.go
Normal file
95
static_error.go
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
package jsonnet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Location
|
||||||
|
|
||||||
|
// Location represents a single location in an (unspecified) file.
|
||||||
|
type Location struct {
|
||||||
|
Line int
|
||||||
|
Column int
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsSet returns if this Location has been set.
|
||||||
|
func (l *Location) IsSet() bool {
|
||||||
|
return l.Line != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Location) String() string {
|
||||||
|
return fmt.Sprintf("%v:%v", l.Line, l.Column)
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// LocationRange
|
||||||
|
|
||||||
|
// LocationRange represents a range of a source file.
|
||||||
|
type LocationRange struct {
|
||||||
|
FileName string
|
||||||
|
Begin Location
|
||||||
|
End Location
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsSet returns if this LocationRange has been set.
|
||||||
|
func (lr *LocationRange) IsSet() bool {
|
||||||
|
return lr.Begin.IsSet()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (lr *LocationRange) String() string {
|
||||||
|
if !lr.IsSet() {
|
||||||
|
return lr.FileName
|
||||||
|
}
|
||||||
|
|
||||||
|
var filePrefix string
|
||||||
|
if len(lr.FileName) > 0 {
|
||||||
|
filePrefix = lr.FileName + ":"
|
||||||
|
}
|
||||||
|
if lr.Begin.Line == lr.End.Line {
|
||||||
|
if lr.Begin.Column == lr.End.Column {
|
||||||
|
return fmt.Sprintf("%s%v", filePrefix, lr.Begin.String())
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s%v-%v", filePrefix, lr.Begin.String(), lr.End.Column)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Sprintf("%s(%v)-(%v)", filePrefix, lr.Begin.String(), lr.End.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is useful for special locations, e.g. manifestation entry point.
|
||||||
|
func makeLocationRangeMessage(msg string) LocationRange {
|
||||||
|
return LocationRange{FileName: msg}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeLocationRange(fn string, begin Location, end Location) LocationRange {
|
||||||
|
return LocationRange{FileName: fn, Begin: begin, End: end}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// StaticError
|
||||||
|
|
||||||
|
// StaticError represents an error during parsing/lexing some jsonnet.
|
||||||
|
type StaticError struct {
|
||||||
|
Loc LocationRange
|
||||||
|
Msg string
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeStaticErrorMsg(msg string) StaticError {
|
||||||
|
return StaticError{Msg: msg}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeStaticErrorPoint(msg string, fn string, l Location) StaticError {
|
||||||
|
return StaticError{Msg: msg, Loc: makeLocationRange(fn, l, l)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeStaticError(msg string, lr LocationRange) StaticError {
|
||||||
|
return StaticError{Msg: msg, Loc: lr}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err StaticError) Error() string {
|
||||||
|
loc := ""
|
||||||
|
if err.Loc.IsSet() {
|
||||||
|
loc = err.Loc.String()
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%v %v", loc, err.Msg)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user