Better importing (caching & public API)

* It adds new API which allows using VM for importing
  files in external tools "just as Jsonnet would". This is
  primarily intended for use in static analysis tools.
* Imports are now cached between evaluate calls. This may improve
  performance significantly for some users. I would like to add
  some way of achieving this with commandline in the future.
* Additional layer of caching was internally added - AST level.
  This was necessary so that Jsonnet could always return the same
  exact AST when asked multiple times (meaning the same pointers).
This commit is contained in:
Stanisław Barzowski 2019-09-09 17:59:51 +02:00
parent 8f0e634bb6
commit 21c00f1b9e
3 changed files with 100 additions and 22 deletions

View File

@ -21,6 +21,9 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"path" "path"
"github.com/google/go-jsonnet/ast"
"github.com/google/go-jsonnet/internal/program"
) )
// An Importer imports data from a path. // An Importer imports data from a path.
@ -40,6 +43,10 @@ type Importer interface {
// then all results of all attempts will be cached separately, // then all results of all attempts will be cached separately,
// both nonexistence and contents of existing ones. // both nonexistence and contents of existing ones.
// FileImporter may serve as an example. // FileImporter may serve as an example.
//
// Importing the same file multiple times must be a cheap operation
// and shouldn't involve copying the whole file - the same buffer
// should be returned.
Import(importedFrom, importedPath string) (contents Contents, foundAt string, err error) Import(importedFrom, importedPath string) (contents Contents, foundAt string, err error)
} }
@ -69,6 +76,7 @@ func MakeContents(s string) Contents {
// It also verifies that the content pointer is the same for two foundAt values. // It also verifies that the content pointer is the same for two foundAt values.
type importCache struct { type importCache struct {
foundAtVerification map[string]Contents foundAtVerification map[string]Contents
astCache map[string]ast.Node
codeCache map[string]potentialValue codeCache map[string]potentialValue
importer Importer importer Importer
} }
@ -78,10 +86,15 @@ func makeImportCache(importer Importer) *importCache {
return &importCache{ return &importCache{
importer: importer, importer: importer,
foundAtVerification: make(map[string]Contents), foundAtVerification: make(map[string]Contents),
astCache: make(map[string]ast.Node),
codeCache: make(map[string]potentialValue), codeCache: make(map[string]potentialValue),
} }
} }
func (cache *importCache) flushValueCache() {
cache.codeCache = make(map[string]potentialValue)
}
func (cache *importCache) importData(importedFrom, importedPath string) (contents Contents, foundAt string, err error) { func (cache *importCache) importData(importedFrom, importedPath string) (contents Contents, foundAt string, err error) {
contents, foundAt, err = cache.importer.Import(importedFrom, importedPath) contents, foundAt, err = cache.importer.Import(importedFrom, importedPath)
if err != nil { if err != nil {
@ -97,6 +110,19 @@ func (cache *importCache) importData(importedFrom, importedPath string) (content
return return
} }
func (cache *importCache) importAST(importedFrom, importedPath string) (ast.Node, string, error) {
contents, foundAt, err := cache.importData(importedFrom, importedPath)
if err != nil {
return nil, "", err
}
if cachedNode, isCached := cache.astCache[foundAt]; isCached {
return cachedNode, foundAt, nil
}
node, err := program.SnippetToAST(foundAt, contents.String())
cache.astCache[foundAt] = node
return node, foundAt, err
}
// ImportString imports a string, caches it and then returns it. // ImportString imports a string, caches it and then returns it.
func (cache *importCache) importString(importedFrom, importedPath string, i *interpreter, trace traceElement) (*valueString, error) { func (cache *importCache) importString(importedFrom, importedPath string, i *interpreter, trace traceElement) (*valueString, error) {
data, _, err := cache.importData(importedFrom, importedPath) data, _, err := cache.importData(importedFrom, importedPath)
@ -107,7 +133,7 @@ func (cache *importCache) importString(importedFrom, importedPath string, i *int
} }
func codeToPV(i *interpreter, filename string, code string) *cachedThunk { func codeToPV(i *interpreter, filename string, code string) *cachedThunk {
node, err := SnippetToAST(filename, code) node, err := program.SnippetToAST(filename, code)
if err != nil { if err != nil {
// TODO(sbarzowski) we should wrap (static) error here // TODO(sbarzowski) we should wrap (static) error here
// within a RuntimeError. Because whether we get this error or not // within a RuntimeError. Because whether we get this error or not
@ -126,14 +152,19 @@ func codeToPV(i *interpreter, filename string, code string) *cachedThunk {
// ImportCode imports code from a path. // ImportCode imports code from a path.
func (cache *importCache) importCode(importedFrom, importedPath string, i *interpreter, trace traceElement) (value, error) { func (cache *importCache) importCode(importedFrom, importedPath string, i *interpreter, trace traceElement) (value, error) {
contents, foundAt, err := cache.importData(importedFrom, importedPath) node, foundAt, err := cache.importAST(importedFrom, importedPath)
if err != nil { if err != nil {
return nil, i.Error(err.Error(), trace) return nil, i.Error(err.Error(), trace)
} }
var pv potentialValue var pv potentialValue
if cachedPV, isCached := cache.codeCache[foundAt]; !isCached { if cachedPV, isCached := cache.codeCache[foundAt]; !isCached {
// File hasn't been parsed and analyzed before, update the cache record. // File hasn't been parsed and analyzed before, update the cache record.
pv = codeToPV(i, foundAt, contents.String()) env := makeInitialEnv(foundAt, i.baseStd)
pv = &cachedThunk{
env: &env,
body: node,
content: nil,
}
cache.codeCache[foundAt] = pv cache.codeCache[foundAt] = pv
} else { } else {
pv = cachedPV pv = cachedPV

View File

@ -1144,10 +1144,10 @@ func buildObject(hide ast.ObjectFieldHide, fields map[string]value) *valueObject
return makeValueSimpleObject(bindingFrame{}, fieldMap, nil, nil) return makeValueSimpleObject(bindingFrame{}, fieldMap, nil, nil)
} }
func buildInterpreter(ext vmExtMap, nativeFuncs map[string]*NativeFunction, maxStack int, importer Importer) (*interpreter, error) { func buildInterpreter(ext vmExtMap, nativeFuncs map[string]*NativeFunction, maxStack int, ic *importCache) (*interpreter, error) {
i := interpreter{ i := interpreter{
stack: makeCallStack(maxStack), stack: makeCallStack(maxStack),
importCache: makeImportCache(importer), importCache: ic,
nativeFuncs: nativeFuncs, nativeFuncs: nativeFuncs,
} }
@ -1210,9 +1210,9 @@ func evaluateAux(i *interpreter, node ast.Node, tla vmExtMap) (value, traceEleme
// TODO(sbarzowski) this function takes far too many arguments - build interpreter in vm instead // TODO(sbarzowski) this function takes far too many arguments - build interpreter in vm instead
func evaluate(node ast.Node, ext vmExtMap, tla vmExtMap, nativeFuncs map[string]*NativeFunction, func evaluate(node ast.Node, ext vmExtMap, tla vmExtMap, nativeFuncs map[string]*NativeFunction,
maxStack int, importer Importer, stringOutputMode bool) (string, error) { maxStack int, ic *importCache, stringOutputMode bool) (string, error) {
i, err := buildInterpreter(ext, nativeFuncs, maxStack, importer) i, err := buildInterpreter(ext, nativeFuncs, maxStack, ic)
if err != nil { if err != nil {
return "", err return "", err
} }
@ -1237,9 +1237,9 @@ func evaluate(node ast.Node, ext vmExtMap, tla vmExtMap, nativeFuncs map[string]
// TODO(sbarzowski) this function takes far too many arguments - build interpreter in vm instead // TODO(sbarzowski) this function takes far too many arguments - build interpreter in vm instead
func evaluateMulti(node ast.Node, ext vmExtMap, tla vmExtMap, nativeFuncs map[string]*NativeFunction, func evaluateMulti(node ast.Node, ext vmExtMap, tla vmExtMap, nativeFuncs map[string]*NativeFunction,
maxStack int, importer Importer, stringOutputMode bool) (map[string]string, error) { maxStack int, ic *importCache, stringOutputMode bool) (map[string]string, error) {
i, err := buildInterpreter(ext, nativeFuncs, maxStack, importer) i, err := buildInterpreter(ext, nativeFuncs, maxStack, ic)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1254,9 +1254,9 @@ func evaluateMulti(node ast.Node, ext vmExtMap, tla vmExtMap, nativeFuncs map[st
// TODO(sbarzowski) this function takes far too many arguments - build interpreter in vm instead // TODO(sbarzowski) this function takes far too many arguments - build interpreter in vm instead
func evaluateStream(node ast.Node, ext vmExtMap, tla vmExtMap, nativeFuncs map[string]*NativeFunction, func evaluateStream(node ast.Node, ext vmExtMap, tla vmExtMap, nativeFuncs map[string]*NativeFunction,
maxStack int, importer Importer) ([]string, error) { maxStack int, ic *importCache) ([]string, error) {
i, err := buildInterpreter(ext, nativeFuncs, maxStack, importer) i, err := buildInterpreter(ext, nativeFuncs, maxStack, ic)
if err != nil { if err != nil {
return nil, err return nil, err
} }

69
vm.go
View File

@ -38,6 +38,7 @@ type VM struct {
importer Importer importer Importer
ErrorFormatter ErrorFormatter ErrorFormatter ErrorFormatter
StringOutput bool StringOutput bool
importCache *importCache
} }
// External variable or top level argument provided before execution // External variable or top level argument provided before execution
@ -53,6 +54,7 @@ type vmExtMap map[string]vmExt
// MakeVM creates a new VM with default parameters. // MakeVM creates a new VM with default parameters.
func MakeVM() *VM { func MakeVM() *VM {
defaultImporter := &FileImporter{}
return &VM{ return &VM{
MaxStack: 500, MaxStack: 500,
ext: make(vmExtMap), ext: make(vmExtMap),
@ -60,32 +62,58 @@ func MakeVM() *VM {
nativeFuncs: make(map[string]*NativeFunction), nativeFuncs: make(map[string]*NativeFunction),
ErrorFormatter: &termErrorFormatter{pretty: false, maxStackTraceSize: 20}, ErrorFormatter: &termErrorFormatter{pretty: false, maxStackTraceSize: 20},
importer: &FileImporter{}, importer: &FileImporter{},
importCache: makeImportCache(defaultImporter),
} }
} }
// Fully flush cache. This should be executed when we are no longer sure that the source files
// didn't change, for example when the importer changed.
func (vm *VM) flushCache() {
vm.importCache = makeImportCache(vm.importer)
}
// Flush value cache. This should be executed when calculated values may no longer be up to date,
// for example due to change in extVars.
func (vm *VM) flushValueCache() {
vm.importCache.flushValueCache()
}
// ExtVar binds a Jsonnet external var to the given value. // ExtVar binds a Jsonnet external var to the given value.
func (vm *VM) ExtVar(key string, val string) { func (vm *VM) ExtVar(key string, val string) {
vm.ext[key] = vmExt{value: val, isCode: false} vm.ext[key] = vmExt{value: val, isCode: false}
vm.flushValueCache()
} }
// ExtCode binds a Jsonnet external code var to the given code. // ExtCode binds a Jsonnet external code var to the given code.
func (vm *VM) ExtCode(key string, val string) { func (vm *VM) ExtCode(key string, val string) {
vm.ext[key] = vmExt{value: val, isCode: true} vm.ext[key] = vmExt{value: val, isCode: true}
vm.flushValueCache()
} }
// TLAVar binds a Jsonnet top level argument to the given value. // TLAVar binds a Jsonnet top level argument to the given value.
func (vm *VM) TLAVar(key string, val string) { func (vm *VM) TLAVar(key string, val string) {
vm.tla[key] = vmExt{value: val, isCode: false} vm.tla[key] = vmExt{value: val, isCode: false}
// Setting a TLA does not require flushing the cache.
// Only the results of evaluation of imported files are cached
// and the TLAs do not affect these unlike extVars.
} }
// TLACode binds a Jsonnet top level argument to the given code. // TLACode binds a Jsonnet top level argument to the given code.
func (vm *VM) TLACode(key string, val string) { func (vm *VM) TLACode(key string, val string) {
vm.tla[key] = vmExt{value: val, isCode: true} vm.tla[key] = vmExt{value: val, isCode: true}
// Setting a TLA does not require flushing the cache - see above.
} }
// Importer sets Importer to use during evaluation (import callback). // Importer sets Importer to use during evaluation (import callback).
func (vm *VM) Importer(i Importer) { func (vm *VM) Importer(i Importer) {
vm.importer = i vm.importer = i
vm.flushCache()
}
// NativeFunction registers a native function.
func (vm *VM) NativeFunction(f *NativeFunction) {
vm.nativeFuncs[f.Name] = f
vm.flushValueCache()
} }
type evalKind int type evalKind int
@ -105,7 +133,7 @@ func (vm *VM) Evaluate(node ast.Node) (val string, err error) {
err = fmt.Errorf("(CRASH) %v\n%s", r, debug.Stack()) err = fmt.Errorf("(CRASH) %v\n%s", r, debug.Stack())
} }
}() }()
return evaluate(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importer, vm.StringOutput) return evaluate(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importCache, vm.StringOutput)
} }
// EvaluateStream evaluates a Jsonnet program given by an Abstract Syntax Tree // EvaluateStream evaluates a Jsonnet program given by an Abstract Syntax Tree
@ -116,7 +144,7 @@ func (vm *VM) EvaluateStream(node ast.Node) (output interface{}, err error) {
err = fmt.Errorf("(CRASH) %v\n%s", r, debug.Stack()) err = fmt.Errorf("(CRASH) %v\n%s", r, debug.Stack())
} }
}() }()
return evaluateStream(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importer) return evaluateStream(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importCache)
} }
// EvaluateMulti evaluates a Jsonnet program given by an Abstract Syntax Tree // EvaluateMulti evaluates a Jsonnet program given by an Abstract Syntax Tree
@ -128,7 +156,7 @@ func (vm *VM) EvaluateMulti(node ast.Node) (output interface{}, err error) {
err = fmt.Errorf("(CRASH) %v\n%s", r, debug.Stack()) err = fmt.Errorf("(CRASH) %v\n%s", r, debug.Stack())
} }
}() }()
return evaluateMulti(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importer, vm.StringOutput) return evaluateMulti(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importCache, vm.StringOutput)
} }
func (vm *VM) evaluateSnippet(filename string, snippet string, kind evalKind) (output interface{}, err error) { func (vm *VM) evaluateSnippet(filename string, snippet string, kind evalKind) (output interface{}, err error) {
@ -143,11 +171,11 @@ func (vm *VM) evaluateSnippet(filename string, snippet string, kind evalKind) (o
} }
switch kind { switch kind {
case evalKindRegular: case evalKindRegular:
output, err = evaluate(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importer, vm.StringOutput) output, err = evaluate(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importCache, vm.StringOutput)
case evalKindMulti: case evalKindMulti:
output, err = evaluateMulti(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importer, vm.StringOutput) output, err = evaluateMulti(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importCache, vm.StringOutput)
case evalKindStream: case evalKindStream:
output, err = evaluateStream(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importer) output, err = evaluateStream(node, vm.ext, vm.tla, vm.nativeFuncs, vm.MaxStack, vm.importCache)
} }
if err != nil { if err != nil {
return "", err return "", err
@ -155,11 +183,6 @@ func (vm *VM) evaluateSnippet(filename string, snippet string, kind evalKind) (o
return output, nil return output, nil
} }
// NativeFunction registers a native function.
func (vm *VM) NativeFunction(f *NativeFunction) {
vm.nativeFuncs[f.Name] = f
}
// EvaluateSnippet evaluates a string containing Jsonnet code, return a JSON // EvaluateSnippet evaluates a string containing Jsonnet code, return a JSON
// string. // string.
// //
@ -199,6 +222,30 @@ func (vm *VM) EvaluateSnippetMulti(filename string, snippet string) (files map[s
return return
} }
// ResolveImport finds the actual path where the imported file can be found.
// It will cache the contents of the file immediately as well, to avoid the possibility of the file
// disappearing after being checked.
func (vm *VM) ResolveImport(importedFrom, importedPath string) (foundAt string, err error) {
_, foundAt, err = vm.importCache.importData(importedFrom, importedPath)
return
}
// ImportData fetches the data just as if it was imported from a Jsonnet file located at `importedFrom`.
// It shares the cache with the actual evaluation.
func (vm *VM) ImportData(importedFrom, importedPath string) (contents string, foundAt string, err error) {
c, foundAt, err := vm.importCache.importData(importedFrom, importedPath)
if err != nil {
return "", foundAt, err
}
return c.String(), foundAt, err
}
// ImportAST fetches the Jsonnet AST just as if it was imported from a Jsonnet file located at `importedFrom`.
// It shares the cache with the actual evaluation.
func (vm *VM) ImportAST(importedFrom, importedPath string) (contents ast.Node, foundAt string, err error) {
return vm.importCache.importAST(importedFrom, importedPath)
}
// SnippetToAST parses a snippet and returns the resulting AST. // SnippetToAST parses a snippet and returns the resulting AST.
func SnippetToAST(filename string, snippet string) (ast.Node, error) { func SnippetToAST(filename string, snippet string) (ast.Node, error) {
return program.SnippetToAST(filename, snippet) return program.SnippetToAST(filename, snippet)