From fe82a78401d2f07b6fd858a4089fb5789cb37039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Barzowski?= Date: Tue, 26 Feb 2019 11:45:53 +0100 Subject: [PATCH] Adaptive string representation When adding long strings, don't copy them immediately. Instead build long strings only when their contents are requested. This allows to build a long string from parts, using a regular operator+ in linear time. This lets users to worry much less about using std.join etc. If indexing the string is mixed with building it using operator+ the behavior can still be quadratic. We may want to address it in a later change. --- builtins.go | 58 ++++++++--------- imports.go | 2 +- interpreter.go | 30 ++++----- value.go | 172 +++++++++++++++++++++++++++++++++++++------------ 4 files changed, 177 insertions(+), 85 deletions(-) diff --git a/builtins.go b/builtins.go index 5be1bc4..7194f1b 100644 --- a/builtins.go +++ b/builtins.go @@ -33,12 +33,12 @@ import ( func builtinPlus(i *interpreter, trace traceElement, x, y value) (value, error) { // TODO(sbarzowski) perhaps a more elegant way to dispatch switch right := y.(type) { - case *valueString: + case valueString: left, err := builtinToString(i, trace, x) if err != nil { return nil, err } - return concatStrings(left.(*valueString), right), nil + return concatStrings(left.(valueString), right), nil } switch left := x.(type) { @@ -48,12 +48,12 @@ func builtinPlus(i *interpreter, trace traceElement, x, y value) (value, error) return nil, err } return makeDoubleCheck(i, trace, left.value+right.value) - case *valueString: + case valueString: right, err := builtinToString(i, trace, y) if err != nil { return nil, err } - return concatStrings(left, right.(*valueString)), nil + return concatStrings(left, right.(valueString)), nil case *valueObject: switch right := y.(type) { case *valueObject: @@ -135,7 +135,7 @@ func valueLess(i *interpreter, trace traceElement, x, yv value) (bool, error) { return false, err } return left.value < right.value, nil - case *valueString: + case valueString: right, err := i.getString(yv, trace) if err != nil { return false, err @@ -178,7 +178,7 @@ func builtinLength(i *interpreter, trace traceElement, x value) (value, error) { num = len(objectFields(x, withoutHidden)) case *valueArray: num = len(x.elements) - case *valueString: + case valueString: num = x.length() case *valueFunction: num = len(x.Parameters().required) @@ -190,7 +190,7 @@ func builtinLength(i *interpreter, trace traceElement, x value) (value, error) { func builtinToString(i *interpreter, trace traceElement, x value) (value, error) { switch x := x.(type) { - case *valueString: + case valueString: return x, nil } var buf bytes.Buffer @@ -209,7 +209,7 @@ func builtinTrace(i *interpreter, trace traceElement, x value, y value) (value, filename := trace.loc.FileName line := trace.loc.Begin.Line fmt.Fprintf( - os.Stderr, "TRACE: %s:%d %s\n", filename, line, xStr.getString()) + os.Stderr, "TRACE: %s:%d %s\n", filename, line, xStr.getGoString()) return y, nil } @@ -306,7 +306,7 @@ func joinArrays(i *interpreter, trace traceElement, sep *valueArray, arr *valueA return makeValueArray(result), nil } -func joinStrings(i *interpreter, trace traceElement, sep *valueString, arr *valueArray) (value, error) { +func joinStrings(i *interpreter, trace traceElement, sep valueString, arr *valueArray) (value, error) { result := make([]rune, 0, arr.length()) first := true for _, elem := range arr.elements { @@ -317,17 +317,17 @@ func joinStrings(i *interpreter, trace traceElement, sep *valueString, arr *valu switch v := elemValue.(type) { case *valueNull: continue - case *valueString: + case valueString: if !first { - result = append(result, sep.value...) + result = append(result, sep.getRunes()...) } - result = append(result, v.value...) + result = append(result, v.getRunes()...) default: - return nil, i.typeErrorSpecific(elemValue, &valueString{}, trace) + return nil, i.typeErrorSpecific(elemValue, emptyString(), trace) } first = false } - return &valueString{value: result}, nil + return makeStringFromRunes(result), nil } func builtinJoin(i *interpreter, trace traceElement, sep, arrv value) (value, error) { @@ -336,7 +336,7 @@ func builtinJoin(i *interpreter, trace traceElement, sep, arrv value) (value, er return nil, err } switch sep := sep.(type) { - case *valueString: + case valueString: return joinStrings(i, trace, sep, arr) case *valueArray: return joinArrays(i, trace, sep, arr) @@ -524,7 +524,7 @@ func primitiveEquals(i *interpreter, trace traceElement, x, y value) (value, err return nil, err } return makeValueBoolean(left.value == right.value), nil - case *valueString: + case valueString: right, err := i.getString(y, trace) if err != nil { return nil, err @@ -559,7 +559,7 @@ func rawEquals(i *interpreter, trace traceElement, x, y value) (bool, error) { return false, err } return left.value == right.value, nil - case *valueString: + case valueString: right, err := i.getString(y, trace) if err != nil { return false, err @@ -660,7 +660,7 @@ func builtinMd5(i *interpreter, trace traceElement, x value) (value, error) { if err != nil { return nil, err } - hash := md5.Sum([]byte(string(str.value))) + hash := md5.Sum([]byte(str.getGoString())) return makeValueString(hex.EncodeToString(hash[:])), nil } @@ -669,7 +669,7 @@ func builtinEncodeUTF8(i *interpreter, trace traceElement, x value) (value, erro if err != nil { return nil, err } - s := str.getString() + s := str.getGoString() elems := make([]*cachedThunk, 0, len(s)) // it will be longer if characters fall outside of ASCII for _, c := range []byte(s) { elems = append(elems, readyThunk(makeValueNumber(float64(c)))) @@ -721,7 +721,7 @@ func builtinCodepoint(i *interpreter, trace traceElement, x value) (value, error if str.length() != 1 { return nil, i.Error(fmt.Sprintf("codepoint takes a string of length 1, got length %v", str.length()), trace) } - return makeValueNumber(float64(str.value[0])), nil + return makeValueNumber(float64(str.getRunes()[0])), nil } func makeDoubleCheck(i *interpreter, trace traceElement, x float64) (value, error) { @@ -823,7 +823,7 @@ func builtinObjectHasEx(i *interpreter, trace traceElement, objv value, fnamev v return nil, err } h := withHiddenFromBool(includeHidden.value) - hasField := objectHasField(objectBinding(obj), string(fname.value), h) + hasField := objectHasField(objectBinding(obj), string(fname.getRunes()), h) return makeValueBoolean(hasField), nil } @@ -855,8 +855,8 @@ func builtinSplitLimit(i *interpreter, trace traceElement, strv, cv, maxSplitsV if maxSplits < -1 { return nil, i.Error(fmt.Sprintf("std.splitLimit third parameter should be -1 or non-negative, got %v", maxSplits), trace) } - sStr := str.getString() - sC := c.getString() + sStr := str.getGoString() + sC := c.getGoString() if len(sC) != 1 { return nil, i.Error(fmt.Sprintf("std.splitLimit second parameter should have length 1, got %v", len(sC)), trace) } @@ -889,9 +889,9 @@ func builtinStrReplace(i *interpreter, trace traceElement, strv, fromv, tov valu if err != nil { return nil, err } - sStr := str.getString() - sFrom := from.getString() - sTo := to.getString() + sStr := str.getGoString() + sFrom := from.getGoString() + sTo := to.getGoString() if len(sFrom) == 0 { return nil, i.Error("'from' string must not be zero length.", trace) } @@ -965,7 +965,7 @@ func builtinParseJSON(i *interpreter, trace traceElement, str value) (value, err if err != nil { return nil, err } - s := sval.getString() + s := sval.getGoString() var parsedJSON interface{} err = json.Unmarshal([]byte(s), &parsedJSON) if err != nil { @@ -979,7 +979,7 @@ func builtinExtVar(i *interpreter, trace traceElement, name value) (value, error if err != nil { return nil, err } - index := str.getString() + index := str.getGoString() if pv, ok := i.extVars[index]; ok { return i.evaluatePV(pv, trace) } @@ -991,7 +991,7 @@ func builtinNative(i *interpreter, trace traceElement, name value) (value, error if err != nil { return nil, err } - index := str.getString() + index := str.getGoString() if f, exists := i.nativeFuncs[index]; exists { return &valueFunction{ec: f}, nil } diff --git a/imports.go b/imports.go index 512602d..dc254db 100644 --- a/imports.go +++ b/imports.go @@ -124,7 +124,7 @@ func (cache *importCache) importAST(importedFrom, importedPath string) (ast.Node } // ImportString imports a string, caches it and then returns it. -func (cache *importCache) importString(importedFrom, importedPath string, i *interpreter, trace traceElement) (*valueString, error) { +func (cache *importCache) importString(importedFrom, importedPath string, i *interpreter, trace traceElement) (valueString, error) { data, _, err := cache.importData(importedFrom, importedPath) if err != nil { return nil, i.Error(err.Error(), trace) diff --git a/interpreter.go b/interpreter.go index a278f97..9986eda 100644 --- a/interpreter.go +++ b/interpreter.go @@ -379,8 +379,8 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) { } var fieldName string switch fieldNameValue := fieldNameValue.(type) { - case *valueString: - fieldName = fieldNameValue.getString() + case valueString: + fieldName = fieldNameValue.getGoString() case *valueNull: // Omitted field. continue @@ -424,7 +424,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) { if err != nil { return nil, err } - return nil, i.Error(msg.getString(), trace) + return nil, i.Error(msg.getGoString(), trace) case *ast.Index: targetValue, err := i.evaluate(node.Target, nonTailCall) @@ -441,7 +441,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) { if err != nil { return nil, err } - return target.index(i, trace, indexString.getString()) + return target.index(i, trace, indexString.getGoString()) case *valueArray: indexInt, err := i.getNumber(index, trace) if err != nil { @@ -450,7 +450,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) { // TODO(https://github.com/google/jsonnet/issues/377): non-integer indexes should be an error return target.index(i, trace, int(indexInt.value)) - case *valueString: + case valueString: indexInt, err := i.getNumber(index, trace) if err != nil { return nil, err @@ -517,7 +517,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) { if err != nil { return nil, err } - return objectIndex(i, trace, i.stack.getSelfBinding().super(), indexStr.getString()) + return objectIndex(i, trace, i.stack.getSelfBinding().super(), indexStr.getGoString()) case *ast.InSuper: index, err := i.evaluate(node.Index, nonTailCall) @@ -528,7 +528,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) { if err != nil { return nil, err } - hasField := objectHasField(i.stack.getSelfBinding().super(), indexStr.getString(), withHidden) + hasField := objectHasField(i.stack.getSelfBinding().super(), indexStr.getGoString(), withHidden) return makeValueBoolean(hasField), nil case *ast.Function: @@ -636,8 +636,8 @@ func (i *interpreter) manifestJSON(trace traceElement, v value) (interface{}, er case *valueNumber: return v.value, nil - case *valueString: - return v.getString(), nil + case valueString: + return v.getGoString(), nil case *valueNull: return nil, nil @@ -802,8 +802,8 @@ func (i *interpreter) manifestAndSerializeJSON( // manifestString expects the value to be a string and returns it. func (i *interpreter) manifestString(buf *bytes.Buffer, trace traceElement, v value) error { switch v := v.(type) { - case *valueString: - buf.WriteString(v.getString()) + case valueString: + buf.WriteString(v.getGoString()) return nil default: return makeRuntimeError(fmt.Sprintf("expected string result, got: %s", v.getType().name), i.getCurrentStackTrace(trace)) @@ -1010,16 +1010,16 @@ func (i *interpreter) evaluateInt64(pv potentialValue, trace traceElement) (int6 return i.getInt64(v, trace) } -func (i *interpreter) getString(val value, trace traceElement) (*valueString, error) { +func (i *interpreter) getString(val value, trace traceElement) (valueString, error) { switch v := val.(type) { - case *valueString: + case valueString: return v, nil default: - return nil, i.typeErrorSpecific(val, &valueString{}, trace) + return nil, i.typeErrorSpecific(val, emptyString(), trace) } } -func (i *interpreter) evaluateString(pv potentialValue, trace traceElement) (*valueString, error) { +func (i *interpreter) evaluateString(pv potentialValue, trace traceElement) (valueString, error) { v, err := i.evaluatePV(pv, trace) if err != nil { return nil, err diff --git a/value.go b/value.go index 13764ac..faee5db 100644 --- a/value.go +++ b/value.go @@ -73,73 +73,165 @@ func (v *valueBase) aValue() {} // Primitive values // ------------------------------------- -// valueString represents a string value, internally using a []rune for quick +type valueString interface { + value + length() int + getRunes() []rune + getGoString() string + index(i *interpreter, trace traceElement, index int) (value, error) +} + +// valueFlatString represents a string value, internally using a []rune for quick // indexing. -type valueString struct { +type valueFlatString struct { valueBase // We use rune slices instead of strings for quick indexing value []rune } -func (s *valueString) index(i *interpreter, trace traceElement, index int) (value, error) { +func (s *valueFlatString) index(i *interpreter, trace traceElement, index int) (value, error) { if 0 <= index && index < s.length() { return makeValueString(string(s.value[index])), nil } return nil, i.Error(fmt.Sprintf("Index %d out of bounds, not within [0, %v)", index, s.length()), trace) } -func concatStrings(a, b *valueString) *valueString { - result := make([]rune, 0, len(a.value)+len(b.value)) - for _, r := range a.value { - result = append(result, r) - } - for _, r := range b.value { - result = append(result, r) - } - return &valueString{value: result} +func (s *valueFlatString) getRunes() []rune { + return s.value } -func stringLessThan(a, b *valueString) bool { - var length int - if len(a.value) < len(b.value) { - length = len(a.value) - } else { - length = len(b.value) +func (s *valueFlatString) getGoString() string { + return string(s.value) +} + +func (s *valueFlatString) length() int { + return len(s.value) +} + +func (s *valueFlatString) getType() *valueType { + return stringType +} + +// TODO(sbarzowski) this was chosen on a hunch, be more systematic about it +const extendedStringMinLength = 30 + +// Indirect representation of long strings. +// It consists of two parts, left and right, concatenation of which is the whole string. +type valueStringTree struct { + valueBase + left, right valueString + len int +} + +func buildFullString(s valueString, buf *[]rune) { + switch s := s.(type) { + case *valueFlatString: + *buf = append(*buf, s.value...) + case *valueStringTree: + buildFullString(s.left, buf) + buildFullString(s.right, buf) } - for i := 0; i < length; i++ { - if a.value[i] != b.value[i] { - return a.value[i] < b.value[i] +} + +func (s *valueStringTree) flattenToLeft() { + if s.right != nil { + result := make([]rune, 0, s.len) + buildFullString(s, &result) + s.left = makeStringFromRunes(result) + s.right = nil + } +} + +func (s *valueStringTree) index(i *interpreter, trace traceElement, index int) (value, error) { + if 0 <= index && index < s.len { + s.flattenToLeft() + return s.left.index(i, trace, index) + } + return nil, i.Error(fmt.Sprintf("Index %d out of bounds, not within [0, %v)", index, s.length()), trace) +} + +func (s *valueStringTree) getRunes() []rune { + s.flattenToLeft() + return s.left.getRunes() +} + +func (s *valueStringTree) getGoString() string { + s.flattenToLeft() + return s.left.getGoString() +} + +func (s *valueStringTree) length() int { + return s.len +} + +func (s *valueStringTree) getType() *valueType { + return stringType +} + +func emptyString() *valueFlatString { + return &valueFlatString{} +} + +func makeStringFromRunes(runes []rune) *valueFlatString { + return &valueFlatString{value: runes} +} + +func concatStrings(a, b valueString) valueString { + aLen := a.length() + bLen := b.length() + if aLen == 0 { + return b + } else if bLen == 0 { + return a + } else if aLen+bLen < extendedStringMinLength { + runesA := a.getRunes() + runesB := b.getRunes() + result := make([]rune, 0, aLen+bLen) + result = append(result, runesA...) + result = append(result, runesB...) + return makeStringFromRunes(result) + } else { + return &valueStringTree{ + left: a, + right: b, + len: aLen + bLen, } } - return len(a.value) < len(b.value) } -func stringEqual(a, b *valueString) bool { - if len(a.value) != len(b.value) { +func stringLessThan(a, b valueString) bool { + runesA := a.getRunes() + runesB := b.getRunes() + var length int + if len(runesA) < len(runesB) { + length = len(runesA) + } else { + length = len(runesB) + } + for i := 0; i < length; i++ { + if runesA[i] != runesB[i] { + return runesA[i] < runesB[i] + } + } + return len(runesA) < len(runesB) +} + +func stringEqual(a, b valueString) bool { + runesA := a.getRunes() + runesB := b.getRunes() + if len(runesA) != len(runesB) { return false } - for i := 0; i < len(a.value); i++ { - if a.value[i] != b.value[i] { + for i := 0; i < len(runesA); i++ { + if runesA[i] != runesB[i] { return false } } return true } -func (s *valueString) length() int { - return len(s.value) -} - -func (s *valueString) getString() string { - return string(s.value) -} - -func makeValueString(v string) *valueString { - return &valueString{value: []rune(v)} -} - -func (*valueString) getType() *valueType { - return stringType +func makeValueString(v string) valueString { + return &valueFlatString{value: []rune(v)} } type valueBoolean struct {