Initial commit with lexer

2025-09-29 01:11:02 +02:00 · 2016-01-21 13:11:48 -08:00 · 2016-01-21 13:11:48 -08:00 · a04a6cf2e3
commit a04a6cf2e3
5 changed files with 1238 additions and 0 deletions
--- a/202
+++ b/202
@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
@ -0,0 +1,5 @@
+# go-jsonnet
+
+This is a port of [jsonnet](http://jsonnet.org/) to go.  It is very much a work in progress.
+
+This implementation is largely based on the the [jsonnet C++ implementation](https://github.com/google/jsonnet).
--- a/lexer.go
+++ b/lexer.go
@ -0,0 +1,681 @@
+package jsonnet
+
+import (
+	"bytes"
+	"fmt"
+	"strconv"
+	"strings"
+	"unicode/utf8"
+)
+
+//////////////////////////////////////////////////////////////////////////////
+// Fodder
+//
+// Fodder is stuff that is usually thrown away by lexers/preprocessors but is
+// kept so that the source can be round tripped with full fidelity.
+type fodderKind int
+
+const (
+	fodderWhitespace fodderKind = iota
+	fodderCommentC
+	fodderCommentCpp
+	fodderCommentHash
+)
+
+type fodderElement struct {
+	kind fodderKind
+	data string
+}
+
+type fodder []fodderElement
+
+//////////////////////////////////////////////////////////////////////////////
+// Token
+
+type tokenKind int
+
+const (
+	tokenInvalid tokenKind = iota
+
+	// Symbols
+	tokenBraceL
+	tokenBraceR
+	tokenBracketL
+	tokenBracketR
+	tokenColon
+	tokenComma
+	tokenDollar
+	tokenDot
+	tokenParenL
+	tokenParenR
+	tokenSemicolon
+
+	// Arbitrary length lexemes
+	tokenIdentifier
+	tokenNumber
+	tokenOperator
+	tokenStringDouble
+	tokenStringSingle
+	tokenStringBlock
+
+	// Keywords
+	tokenAssert
+	tokenElse
+	tokenError
+	tokenFalse
+	tokenFor
+	tokenFunction
+	tokenIf
+	tokenImport
+	tokenImportStr
+	tokenIn
+	tokenLocal
+	tokenNullLit
+	tokenTailStrict
+	tokenThen
+	tokenSelf
+	tokenSuper
+	tokenTrue
+
+	// A special token that holds line/column information about the end of the
+	// file.
+	tokenEndOfFile
+)
+
+type token struct {
+	kind   tokenKind // The type of the token
+	fodder fodder    // Any fodder the occurs before this token
+	data   string    // Content of the token if it is not a keyword
+
+	// Extra info for when kind == tokenStringBlock
+	stringBlockIndent     string // The sequence of whitespace that indented the block.
+	stringBlockTermIndent string // This is always fewer whitespace characters than in stringBlockIndent.
+
+	loc LocationRange
+}
+
+type tokens []token
+
+//////////////////////////////////////////////////////////////////////////////
+// Helpers
+
+func isUpper(r rune) bool {
+	return r >= 'A' && r <= 'Z'
+}
+
+func isLower(r rune) bool {
+	return r >= 'a' && r <= 'z'
+}
+
+func isNumber(r rune) bool {
+	return r >= '0' && r <= '9'
+}
+
+func isIdentifierFirst(r rune) bool {
+	return isUpper(r) || isLower(r) || r == '_'
+}
+
+func isIdentifier(r rune) bool {
+	return isIdentifierFirst(r) || isNumber(r)
+}
+
+func isSymbol(r rune) bool {
+	switch r {
+	case '&', '|', '^', '=', '<', '>', '*', '/', '%', '#':
+		return true
+	}
+	return false
+}
+
+// Check that b has at least the same whitespace prefix as a and returns the
+// amount of this whitespace, otherwise returns 0.  If a has no whitespace
+// prefix than return 0.
+func checkWhitespace(a, b string) int {
+	i := 0
+	for ; i < len(a); i++ {
+		if a[i] != ' ' && a[i] != '\t' {
+			// a has run out of whitespace and b matched up to this point.  Return
+			// result.
+			return i
+		}
+		if i >= len(b) {
+			// We ran off the edge of b while a still has whitespace.  Return 0 as
+			// failure.
+			return 0
+		}
+		if a[i] != b[i] {
+			// a has whitespace but b does not.  Return 0 as failure.
+			return 0
+		}
+	}
+	// We ran off the end of a and b kept up
+	return i
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Lexer
+
+type lexer struct {
+	fileName string // The file name being lexed, only used for errors
+	input    string // The input string
+
+	pos        int // Current byte position in input
+	lineNumber int // Current line number for pos
+	lineStart  int // Byte position of start of line
+
+	// Data about the state position of the lexer before previous call to
+	// 'next'. If this state is lost then prevPos is set to lexEOF and panic
+	// ensues.
+	prevPos        int // Byte position of last rune read
+	prevLineNumber int // The line number before last rune read
+	prevLineStart  int // The line start before last rune read
+
+	tokens tokens // The tokens that we've generated so far
+
+	// Information about the token we are working on right now
+	fodder        fodder
+	tokenStart    int
+	tokenStartLoc Location
+}
+
+const lexEOF = -1
+
+func makeLexer(fn string, input string) *lexer {
+	return &lexer{
+		fileName:       fn,
+		input:          input,
+		lineNumber:     1,
+		prevPos:        lexEOF,
+		prevLineNumber: 1,
+		tokenStartLoc:  Location{Line: 1, Column: 1},
+	}
+}
+
+// next returns the next rune in the input.
+func (l *lexer) next() rune {
+	if int(l.pos) >= len(l.input) {
+		l.prevPos = l.pos
+		return lexEOF
+	}
+	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
+	l.prevPos = l.pos
+	l.pos += w
+	if r == '\n' {
+		l.prevLineNumber = l.lineNumber
+		l.prevLineStart = l.lineStart
+		l.lineNumber += 1
+		l.lineStart = l.pos
+	}
+	return r
+}
+
+func (l *lexer) acceptN(n int) {
+	for i := 0; i < n; i++ {
+		l.next()
+	}
+}
+
+// peek returns but does not consume the next rune in the input.
+func (l *lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+// backup steps back one rune. Can only be called once per call of next.
+func (l *lexer) backup() {
+	if l.prevPos == lexEOF {
+		panic("backup called with no valid previous rune")
+	}
+	l.lineNumber = l.prevLineNumber
+	l.lineStart = l.prevLineStart
+	l.pos = l.prevPos
+	l.prevPos = lexEOF
+}
+
+func (l *lexer) location() Location {
+	return Location{Line: l.lineNumber, Column: l.pos - l.lineStart + 1}
+}
+
+func (l *lexer) prevLocation() Location {
+	if l.prevPos == lexEOF {
+		panic("prevLocation called with no valid previous rune")
+	}
+	return Location{Line: l.prevLineNumber, Column: l.prevPos - l.prevLineStart + 1}
+}
+
+// Reset the current working token start to the current cursor position.  This
+// may throw away some characters.  This does not throw away any accumulated
+// fodder.
+func (l *lexer) resetTokenStart() {
+	l.tokenStart = l.pos
+	l.tokenStartLoc = l.location()
+}
+
+func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) {
+	l.tokens = append(l.tokens, token{
+		kind:                  kind,
+		fodder:                l.fodder,
+		data:                  data,
+		stringBlockIndent:     stringBlockIndent,
+		stringBlockTermIndent: stringBlockTermIndent,
+		loc: makeLocationRange(l.fileName, l.tokenStartLoc, l.location()),
+	})
+	l.fodder = fodder{}
+}
+
+func (l *lexer) emitToken(kind tokenKind) {
+	l.emitFullToken(kind, l.input[l.tokenStart:l.pos], "", "")
+	l.resetTokenStart()
+}
+
+func (l *lexer) addWhitespaceFodder() {
+	fodderData := l.input[l.tokenStart:l.pos]
+	if len(l.fodder) == 0 || l.fodder[len(l.fodder)-1].kind != fodderWhitespace {
+		l.fodder = append(l.fodder, fodderElement{kind: fodderWhitespace, data: fodderData})
+	} else {
+		l.fodder[len(l.fodder)-1].data += fodderData
+	}
+	l.resetTokenStart()
+}
+
+func (l *lexer) addCommentFodder(kind fodderKind) {
+	fodderData := l.input[l.tokenStart:l.pos]
+	l.fodder = append(l.fodder, fodderElement{kind: kind, data: fodderData})
+	l.resetTokenStart()
+}
+
+func (l *lexer) addFodder(kind fodderKind, data string) {
+	l.fodder = append(l.fodder, fodderElement{kind: kind, data: data})
+}
+
+// lexNumber will consume a number and emit a token.  It is assumed
+// that the next rune to be served by the lexer will be a leading digit.
+func (l *lexer) lexNumber() error {
+	// This function should be understood with reference to the linked image:
+	// http://www.json.org/number.gif
+
+	// Note, we deviate from the json.org documentation as follows:
+	// There is no reason to lex negative numbers as atomic tokens, it is better to parse them
+	// as a unary operator combined with a numeric literal.  This avoids x-1 being tokenized as
+	// <identifier> <number> instead of the intended <identifier> <binop> <number>.
+
+	type numLexState int
+	const (
+		numBegin numLexState = iota
+		numAfterZero
+		numAfterOneToNine
+		numAfterDot
+		numAfterDigit
+		numAfterE
+		numAfterExpSign
+		numAfterExpDigit
+	)
+
+	state := numBegin
+	for true {
+		r := l.next()
+		switch state {
+		case numBegin:
+			switch {
+			case r == '0':
+				state = numAfterZero
+			case r >= '1' && r <= '9':
+				state = numAfterOneToNine
+			default:
+				return makeStaticErrorPoint(
+					"Couldn't lex number", l.fileName, l.prevLocation())
+			}
+		case numAfterZero:
+			switch r {
+			case '.':
+				state = numAfterDot
+			case 'e', 'E':
+				state = numAfterE
+			default:
+				goto end
+			}
+		case numAfterOneToNine:
+			switch {
+			case r == '.':
+				state = numAfterDot
+			case r == 'e' || r == 'E':
+				state = numAfterE
+			case r >= '0' && r <= '9':
+				state = numAfterOneToNine
+			default:
+				goto end
+			}
+		case numAfterDot:
+			switch {
+			case r >= '0' && r <= '9':
+				state = numAfterDigit
+			default:
+				return makeStaticErrorPoint(
+					fmt.Sprintf("Couldn't lex number, junk after decimal point: %v", strconv.QuoteRuneToASCII(r)),
+					l.fileName, l.prevLocation())
+			}
+		case numAfterDigit:
+			switch {
+			case r == 'e' || r == 'E':
+				state = numAfterE
+			case r >= '0' && r <= '9':
+				state = numAfterDigit
+			default:
+				goto end
+			}
+		case numAfterE:
+			switch {
+			case r == '+' || r == '-':
+				state = numAfterExpSign
+			case r >= '0' && r <= '9':
+				state = numAfterExpDigit
+			default:
+				return makeStaticErrorPoint(
+					fmt.Sprintf("Couldn't lex number, junk after 'E': %v", strconv.QuoteRuneToASCII(r)),
+					l.fileName, l.prevLocation())
+			}
+		case numAfterExpSign:
+			if r >= '0' && r <= '9' {
+				state = numAfterExpDigit
+			} else {
+				return makeStaticErrorPoint(
+					fmt.Sprintf("Couldn't lex number, junk after exponent sign: %v", strconv.QuoteRuneToASCII(r)),
+					l.fileName, l.prevLocation())
+			}
+
+		case numAfterExpDigit:
+			if r >= '0' && r <= '9' {
+				state = numAfterExpDigit
+			} else {
+				goto end
+			}
+		}
+	}
+end:
+	l.backup()
+	l.emitToken(tokenNumber)
+	return nil
+}
+
+// lexIdentifier will consume a identifer and emit a token.  It is assumed
+// that the next rune to be served by the lexer will be a leading digit.  This
+// may emit a keyword or an identifier.
+func (l *lexer) lexIdentifier() {
+	r := l.next()
+	if !isIdentifierFirst(r) {
+		panic("Unexpected character in lexIdentifier")
+	}
+	for ; r != lexEOF; r = l.next() {
+		if !isIdentifier(r) {
+			break
+		}
+	}
+	l.backup()
+
+	switch l.input[l.tokenStart:l.pos] {
+	case "assert":
+		l.emitToken(tokenAssert)
+	case "else":
+		l.emitToken(tokenElse)
+	case "error":
+		l.emitToken(tokenError)
+	case "false":
+		l.emitToken(tokenFalse)
+	case "for":
+		l.emitToken(tokenFor)
+	case "function":
+		l.emitToken(tokenFunction)
+	case "if":
+		l.emitToken(tokenIf)
+	case "import":
+		l.emitToken(tokenImport)
+	case "importstr":
+		l.emitToken(tokenImportStr)
+	case "in":
+		l.emitToken(tokenIn)
+	case "local":
+		l.emitToken(tokenLocal)
+	case "null":
+		l.emitToken(tokenNullLit)
+	case "self":
+		l.emitToken(tokenSelf)
+	case "super":
+		l.emitToken(tokenSuper)
+	case "tailstrict":
+		l.emitToken(tokenTailStrict)
+	case "then":
+		l.emitToken(tokenThen)
+	case "true":
+		l.emitToken(tokenTrue)
+	default:
+		// Not a keyword, assume it is an identifier
+		l.emitToken(tokenIdentifier)
+	}
+}
+
+// lexSymbol will lex a token that starts with a symbol.  This could be a
+// comment, block quote or an operator.  This function assumes that the next
+// rune to be served by the lexer will be the first rune of the new token.
+func (l *lexer) lexSymbol() error {
+	r := l.next()
+
+	// Single line C++ style comment
+	if r == '/' && l.peek() == '/' {
+		l.next()
+		l.resetTokenStart() // Throw out the leading //
+		for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
+		}
+		// Leave the '\n' in the lexer to be fodder for the next round
+		l.backup()
+		l.addCommentFodder(fodderCommentCpp)
+		return nil
+	}
+
+	if r == '#' {
+		l.resetTokenStart() // Throw out the leading #
+		for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
+		}
+		// Leave the '\n' in the lexer to be fodder for the next round
+		l.backup()
+		l.addCommentFodder(fodderCommentHash)
+		return nil
+	}
+
+	if r == '/' && l.peek() == '*' {
+		commentStartLoc := l.tokenStartLoc
+		l.next()            // consume the '*'
+		l.resetTokenStart() // Throw out the leading /*
+		for r = l.next(); ; r = l.next() {
+			if r == lexEOF {
+				return makeStaticErrorPoint("Multi-line comment has no terminating */.",
+					l.fileName, commentStartLoc)
+			}
+			if r == '*' && l.peek() == '/' {
+				commentData := l.input[l.tokenStart : l.pos-1] // Don't include trailing */
+				l.addFodder(fodderCommentC, commentData)
+				l.next()            // Skip past '/'
+				l.resetTokenStart() // Start next token at this point
+				return nil
+			}
+		}
+	}
+
+	if r == '|' && strings.HasPrefix(l.input[l.pos:], "||\n") {
+		commentStartLoc := l.tokenStartLoc
+		l.acceptN(3) // Skip "||\n"
+		var cb bytes.Buffer
+
+		// Skip leading blank lines
+		for r = l.next(); r == '\n'; r = l.next() {
+			cb.WriteRune(r)
+		}
+		l.backup()
+		numWhiteSpace := checkWhitespace(l.input[l.pos:], l.input[l.pos:])
+		stringBlockIndent := l.input[l.pos : l.pos+numWhiteSpace]
+		if numWhiteSpace == 0 {
+			return makeStaticErrorPoint("Text block's first line must start with whitespace.",
+				l.fileName, commentStartLoc)
+		}
+
+		for {
+			if numWhiteSpace <= 0 {
+				panic("Unexpected value for numWhiteSpace")
+			}
+			l.acceptN(numWhiteSpace)
+			for r = l.next(); r != '\n'; r = l.next() {
+				if r == lexEOF {
+					return makeStaticErrorPoint("Unexpected EOF",
+						l.fileName, commentStartLoc)
+				}
+				cb.WriteRune(r)
+			}
+			cb.WriteRune('\n')
+
+			// Skip any blank lines
+			for r = l.next(); r == '\n'; r = l.next() {
+				cb.WriteRune(r)
+			}
+			l.backup()
+
+			// Look at the next line
+			numWhiteSpace = checkWhitespace(stringBlockIndent, l.input[l.pos:])
+			if numWhiteSpace == 0 {
+				// End of the text block
+				var stringBlockTermIndent string
+				for r = l.next(); r == ' ' || r == '\t'; r = l.next() {
+					stringBlockTermIndent += string(r)
+				}
+				l.backup()
+				if !strings.HasPrefix(l.input[l.pos:], "|||") {
+					return makeStaticErrorPoint("Text block not terminated with |||",
+						l.fileName, commentStartLoc)
+				}
+				l.acceptN(3) // Skip '|||'
+				l.emitFullToken(tokenStringBlock, cb.String(),
+					stringBlockIndent, stringBlockTermIndent)
+				l.resetTokenStart()
+				return nil
+			}
+		}
+	}
+
+	// Assume any string of symbols is a single operator.
+	for r = l.next(); isSymbol(r); r = l.next() {
+
+	}
+	l.backup()
+	l.emitToken(tokenOperator)
+	return nil
+}
+
+func lex(fn string, input string) (tokens, error) {
+	l := makeLexer(fn, input)
+
+	var err error
+
+	for r := l.next(); r != lexEOF; r = l.next() {
+		switch r {
+		case ' ', '\t', '\r', '\n':
+			l.addWhitespaceFodder()
+			continue
+		case '{':
+			l.emitToken(tokenBraceL)
+		case '}':
+			l.emitToken(tokenBraceR)
+		case '[':
+			l.emitToken(tokenBracketL)
+		case ']':
+			l.emitToken(tokenBracketR)
+		case ':':
+			l.emitToken(tokenColon)
+		case ',':
+			l.emitToken(tokenComma)
+		case '$':
+			l.emitToken(tokenDollar)
+		case '.':
+			l.emitToken(tokenDot)
+		case '(':
+			l.emitToken(tokenParenL)
+		case ')':
+			l.emitToken(tokenParenR)
+		case ';':
+			l.emitToken(tokenSemicolon)
+
+			// Operators
+		case '!':
+			if l.peek() == '=' {
+				_ = l.next()
+			}
+			l.emitToken(tokenOperator)
+		case '~', '+', '-':
+			l.emitToken(tokenOperator)
+
+		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			l.backup()
+			err = l.lexNumber()
+			if err != nil {
+				return nil, err
+			}
+
+			// String literals
+		case '"':
+			stringStartLoc := l.prevLocation()
+			l.resetTokenStart() // Don't include the quotes in the token data
+			for r = l.next(); ; r = l.next() {
+				if r == lexEOF {
+					return nil, makeStaticErrorPoint("Unterminated String", l.fileName, stringStartLoc)
+				}
+				if r == '"' {
+					l.backup()
+					l.emitToken(tokenStringDouble)
+					_ = l.next()
+					l.resetTokenStart()
+					break
+				}
+				if r == '\\' && l.peek() != lexEOF {
+					r = l.next()
+				}
+			}
+		case '\'':
+			stringStartLoc := l.prevLocation()
+			l.resetTokenStart() // Don't include the quotes in the token data
+			for r = l.next(); ; r = l.next() {
+				if r == lexEOF {
+					return nil, makeStaticErrorPoint("Unterminated String", l.fileName, stringStartLoc)
+				}
+				if r == '\'' {
+					l.backup()
+					l.emitToken(tokenStringSingle)
+					r = l.next()
+					l.resetTokenStart()
+					break
+				}
+				if r == '\\' && l.peek() != lexEOF {
+					r = l.next()
+				}
+			}
+		default:
+			if isIdentifierFirst(r) {
+				l.backup()
+				l.lexIdentifier()
+			} else if isSymbol(r) {
+				l.backup()
+				err = l.lexSymbol()
+				if err != nil {
+					return nil, err
+				}
+			} else {
+				return nil, makeStaticErrorPoint(
+					fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)),
+					l.fileName, l.prevLocation())
+			}
+
+		}
+	}
+
+	// We are currently at the EOF.  Emit a special token to capture any
+	// trailing fodder
+	l.emitToken(tokenEndOfFile)
+	return l.tokens, nil
+}
--- a/lexer_test.go
+++ b/lexer_test.go
@ -0,0 +1,255 @@
+package jsonnet
+
+import (
+	"testing"
+)
+
+type lexTest struct {
+	name      string
+	input     string
+	tokens    tokens
+	errString string
+}
+
+var (
+	tEOF = token{kind: tokenEndOfFile}
+)
+
+var lexTests = []lexTest{
+	{"empty", "", tokens{}, ""},
+	{"whitespace", "  \t\n\r\r\n", tokens{}, ""},
+
+	{"brace L", "{", tokens{{kind: tokenBraceL, data: "{"}}, ""},
+	{"brace R", "}", tokens{{kind: tokenBraceR, data: "}"}}, ""},
+	{"bracket L", "[", tokens{{kind: tokenBracketL, data: "["}}, ""},
+	{"bracket R", "]", tokens{{kind: tokenBracketR, data: "]"}}, ""},
+	{"colon", ":", tokens{{kind: tokenColon, data: ":"}}, ""},
+	{"comma", ",", tokens{{kind: tokenComma, data: ","}}, ""},
+	{"dollar", "$", tokens{{kind: tokenDollar, data: "$"}}, ""},
+	{"dot", ".", tokens{{kind: tokenDot, data: "."}}, ""},
+	{"paren L", "(", tokens{{kind: tokenParenL, data: "("}}, ""},
+	{"paren R", ")", tokens{{kind: tokenParenR, data: ")"}}, ""},
+	{"semicolon", ";", tokens{{kind: tokenSemicolon, data: ";"}}, ""},
+
+	{"not 1", "!", tokens{{kind: tokenOperator, data: "!"}}, ""},
+	{"not 2", "! ", tokens{{kind: tokenOperator, data: "!"}}, ""},
+	{"not equal", "!=", tokens{{kind: tokenOperator, data: "!="}}, ""},
+	{"tilde", "~", tokens{{kind: tokenOperator, data: "~"}}, ""},
+	{"plus", "+", tokens{{kind: tokenOperator, data: "+"}}, ""},
+	{"minus", "-", tokens{{kind: tokenOperator, data: "-"}}, ""},
+
+	{"number 0", "0", tokens{{kind: tokenNumber, data: "0"}}, ""},
+	{"number 1", "1", tokens{{kind: tokenNumber, data: "1"}}, ""},
+	{"number 1.0", "1.0", tokens{{kind: tokenNumber, data: "1.0"}}, ""},
+	{"number 0.1", "0.1", tokens{{kind: tokenNumber, data: "0.1"}}, ""},
+	{"number 0e100", "0e100", tokens{{kind: tokenNumber, data: "0e100"}}, ""},
+	{"number 1e100", "1e100", tokens{{kind: tokenNumber, data: "1e100"}}, ""},
+	{"number 1.1e100", "1.1e100", tokens{{kind: tokenNumber, data: "1.1e100"}}, ""},
+	{"number 1.1e-100", "1.1e-100", tokens{{kind: tokenNumber, data: "1.1e-100"}}, ""},
+	{"number 1.1e+100", "1.1e+100", tokens{{kind: tokenNumber, data: "1.1e+100"}}, ""},
+	{"number 0100", "0100", tokens{
+		{kind: tokenNumber, data: "0"},
+		{kind: tokenNumber, data: "100"},
+	}, ""},
+	{"number 10+10", "10+10", tokens{
+		{kind: tokenNumber, data: "10"},
+		{kind: tokenOperator, data: "+"},
+		{kind: tokenNumber, data: "10"},
+	}, ""},
+	{"number 1.+3", "1.+3", tokens{}, "number 1.+3:1:3 Couldn't lex number, junk after decimal point: '+'"},
+	{"number 1e!", "1e!", tokens{}, "number 1e!:1:3 Couldn't lex number, junk after 'E': '!'"},
+	{"number 1e+!", "1e+!", tokens{}, "number 1e+!:1:4 Couldn't lex number, junk after exponent sign: '!'"},
+
+	{"double string \"hi\"", "\"hi\"", tokens{{kind: tokenStringDouble, data: "hi"}}, ""},
+	{"double string \"hi nl\"", "\"hi\n\"", tokens{{kind: tokenStringDouble, data: "hi\n"}}, ""},
+	{"double string \"hi\\\"\"", "\"hi\\\"\"", tokens{{kind: tokenStringDouble, data: "hi\\\""}}, ""},
+	{"double string \"hi\\nl\"", "\"hi\\\n\"", tokens{{kind: tokenStringDouble, data: "hi\\\n"}}, ""},
+	{"double string \"hi", "\"hi", tokens{}, "double string \"hi:1:1 Unterminated String"},
+
+	{"single string 'hi'", "'hi'", tokens{{kind: tokenStringSingle, data: "hi"}}, ""},
+	{"single string 'hi nl'", "'hi\n'", tokens{{kind: tokenStringSingle, data: "hi\n"}}, ""},
+	{"single string 'hi\\''", "'hi\\''", tokens{{kind: tokenStringSingle, data: "hi\\'"}}, ""},
+	{"single string 'hi\\nl'", "'hi\\\n'", tokens{{kind: tokenStringSingle, data: "hi\\\n"}}, ""},
+	{"single string 'hi", "'hi", tokens{}, "single string 'hi:1:1 Unterminated String"},
+
+	{"assert", "assert", tokens{{kind: tokenAssert, data: "assert"}}, ""},
+	{"else", "else", tokens{{kind: tokenElse, data: "else"}}, ""},
+	{"error", "error", tokens{{kind: tokenError, data: "error"}}, ""},
+	{"false", "false", tokens{{kind: tokenFalse, data: "false"}}, ""},
+	{"for", "for", tokens{{kind: tokenFor, data: "for"}}, ""},
+	{"function", "function", tokens{{kind: tokenFunction, data: "function"}}, ""},
+	{"if", "if", tokens{{kind: tokenIf, data: "if"}}, ""},
+	{"import", "import", tokens{{kind: tokenImport, data: "import"}}, ""},
+	{"importstr", "importstr", tokens{{kind: tokenImportStr, data: "importstr"}}, ""},
+	{"in", "in", tokens{{kind: tokenIn, data: "in"}}, ""},
+	{"local", "local", tokens{{kind: tokenLocal, data: "local"}}, ""},
+	{"null", "null", tokens{{kind: tokenNullLit, data: "null"}}, ""},
+	{"self", "self", tokens{{kind: tokenSelf, data: "self"}}, ""},
+	{"super", "super", tokens{{kind: tokenSuper, data: "super"}}, ""},
+	{"tailstrict", "tailstrict", tokens{{kind: tokenTailStrict, data: "tailstrict"}}, ""},
+	{"then", "then", tokens{{kind: tokenThen, data: "then"}}, ""},
+	{"true", "true", tokens{{kind: tokenTrue, data: "true"}}, ""},
+
+	{"identifier", "foobar", tokens{{kind: tokenIdentifier, data: "foobar"}}, ""},
+
+	{"c++ comment", "// hi", tokens{}, ""},  // This test doesn't look at fodder (yet?)
+	{"hash comment", "# hi", tokens{}, ""},  // This test doesn't look at fodder (yet?)
+	{"c comment", "/* hi */", tokens{}, ""}, // This test doesn't look at fodder (yet?)
+
+	{
+		"block string spaces",
+		`|||
+  test
+    more
+  |||
+    foo
+|||`,
+		tokens{
+			{
+				kind:                  tokenStringBlock,
+				data:                  "test\n  more\n|||\n  foo\n",
+				stringBlockIndent:     "  ",
+				stringBlockTermIndent: "",
+			},
+		},
+		"",
+	},
+	{
+		"block string tabs",
+		`|||
+	test
+	  more
+	|||
+	  foo
+|||`,
+		tokens{
+			{
+				kind:                  tokenStringBlock,
+				data:                  "test\n  more\n|||\n  foo\n",
+				stringBlockIndent:     "\t",
+				stringBlockTermIndent: "",
+			},
+		},
+		"",
+	},
+	{
+		"block string mixed",
+		`|||
+	  	test
+	  	  more
+	  	|||
+	  	  foo
+|||`,
+		tokens{
+			{
+				kind:                  tokenStringBlock,
+				data:                  "test\n  more\n|||\n  foo\n",
+				stringBlockIndent:     "\t  \t",
+				stringBlockTermIndent: "",
+			},
+		},
+		"",
+	},
+	{
+		"block string blanks",
+		`|||
+
+  test
+
+
+    more
+  |||
+    foo
+|||`,
+		tokens{
+			{
+				kind:                  tokenStringBlock,
+				data:                  "\ntest\n\n\n  more\n|||\n  foo\n",
+				stringBlockIndent:     "  ",
+				stringBlockTermIndent: "",
+			},
+		},
+		"",
+	},
+	{
+		"block string bad indent",
+		`|||
+  test
+ foo
+|||`,
+		tokens{},
+		"block string bad indent:1:1 Text block not terminated with |||",
+	},
+	{
+		"block string eof",
+		`|||
+  test`,
+		tokens{},
+		"block string eof:1:1 Unexpected EOF",
+	},
+	{
+		"block string not term",
+		`|||
+  test
+`,
+		tokens{},
+		"block string not term:1:1 Text block not terminated with |||",
+	},
+
+	{"op *", "*", tokens{{kind: tokenOperator, data: "*"}}, ""},
+	{"op /", "/", tokens{{kind: tokenOperator, data: "/"}}, ""},
+	{"op %", "%", tokens{{kind: tokenOperator, data: "%"}}, ""},
+	{"op &", "&", tokens{{kind: tokenOperator, data: "&"}}, ""},
+	{"op |", "|", tokens{{kind: tokenOperator, data: "|"}}, ""},
+	{"op ^", "^", tokens{{kind: tokenOperator, data: "^"}}, ""},
+	{"op =", "=", tokens{{kind: tokenOperator, data: "="}}, ""},
+	{"op <", "<", tokens{{kind: tokenOperator, data: "<"}}, ""},
+	{"op >", ">", tokens{{kind: tokenOperator, data: ">"}}, ""},
+	{"op >==|", ">==|", tokens{{kind: tokenOperator, data: ">==|"}}, ""},
+
+	{"junk", "💩", tokens{}, "junk:1:1 Could not lex the character '\\U0001f4a9'"},
+}
+
+func tokensEqual(ts1, ts2 tokens) bool {
+	if len(ts1) != len(ts2) {
+		return false
+	}
+	for i := range ts1 {
+		t1, t2 := ts1[i], ts2[i]
+		if t1.kind != t2.kind {
+			return false
+		}
+		if t1.data != t2.data {
+			return false
+		}
+		if t1.stringBlockIndent != t2.stringBlockIndent {
+			return false
+		}
+		if t1.stringBlockTermIndent != t2.stringBlockTermIndent {
+			return false
+		}
+	}
+	return true
+}
+
+func TestLex(t *testing.T) {
+	for _, test := range lexTests {
+		// Copy the test tokens and append an EOF token
+		testTokens := append(tokens(nil), test.tokens...)
+		testTokens = append(testTokens, tEOF)
+		tokens, err := lex(test.name, test.input)
+		var errString string
+		if err != nil {
+			errString = err.Error()
+		}
+		if errString != test.errString {
+			t.Errorf("%s: error result does not match. got\n\t%+v\nexpected\n\t%+v",
+				test.name, errString, test.errString)
+		}
+		if err == nil && !tokensEqual(tokens, testTokens) {
+			t.Errorf("%s: got\n\t%+v\nexpected\n\t%+v", test.name, tokens, testTokens)
+		}
+	}
+}
+
+// TODO: test fodder, test position reporting
--- a/static_error.go
+++ b/static_error.go
@ -0,0 +1,95 @@
+package jsonnet
+
+import (
+	"fmt"
+)
+
+//////////////////////////////////////////////////////////////////////////////
+// Location
+
+// Location represents a single location in an (unspecified) file.
+type Location struct {
+	Line   int
+	Column int
+}
+
+// IsSet returns if this Location has been set.
+func (l *Location) IsSet() bool {
+	return l.Line != 0
+}
+
+func (l *Location) String() string {
+	return fmt.Sprintf("%v:%v", l.Line, l.Column)
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// LocationRange
+
+// LocationRange represents a range of a source file.
+type LocationRange struct {
+	FileName string
+	Begin    Location
+	End      Location
+}
+
+// IsSet returns if this LocationRange has been set.
+func (lr *LocationRange) IsSet() bool {
+	return lr.Begin.IsSet()
+}
+
+func (lr *LocationRange) String() string {
+	if !lr.IsSet() {
+		return lr.FileName
+	}
+
+	var filePrefix string
+	if len(lr.FileName) > 0 {
+		filePrefix = lr.FileName + ":"
+	}
+	if lr.Begin.Line == lr.End.Line {
+		if lr.Begin.Column == lr.End.Column {
+			return fmt.Sprintf("%s%v", filePrefix, lr.Begin.String())
+		}
+		return fmt.Sprintf("%s%v-%v", filePrefix, lr.Begin.String(), lr.End.Column)
+	}
+
+	return fmt.Sprintf("%s(%v)-(%v)", filePrefix, lr.Begin.String(), lr.End.String())
+}
+
+// This is useful for special locations, e.g. manifestation entry point.
+func makeLocationRangeMessage(msg string) LocationRange {
+	return LocationRange{FileName: msg}
+}
+
+func makeLocationRange(fn string, begin Location, end Location) LocationRange {
+	return LocationRange{FileName: fn, Begin: begin, End: end}
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// StaticError
+
+// StaticError represents an error during parsing/lexing some jsonnet.
+type StaticError struct {
+	Loc LocationRange
+	Msg string
+}
+
+func makeStaticErrorMsg(msg string) StaticError {
+	return StaticError{Msg: msg}
+}
+
+func makeStaticErrorPoint(msg string, fn string, l Location) StaticError {
+	return StaticError{Msg: msg, Loc: makeLocationRange(fn, l, l)}
+}
+
+func makeStaticError(msg string, lr LocationRange) StaticError {
+	return StaticError{Msg: msg, Loc: lr}
+}
+
+func (err StaticError) Error() string {
+	loc := ""
+	if err.Loc.IsSet() {
+		loc = err.Loc.String()
+	}
+	return fmt.Sprintf("%v %v", loc, err.Msg)
+}