Adaptive string representation

When adding long strings, don't copy them immediately. Instead
build long strings only when their contents are requested.

This allows to build a long string from parts, using a regular
operator+ in linear time. This lets users to worry much less
about using std.join etc.

If indexing the string is mixed with building it using operator+
the behavior can still be quadratic. We may want to address it in
a later change.
This commit is contained in:
Stanisław Barzowski 2019-02-26 11:45:53 +01:00
parent b44e63e102
commit fe82a78401
4 changed files with 177 additions and 85 deletions

View File

@ -33,12 +33,12 @@ import (
func builtinPlus(i *interpreter, trace traceElement, x, y value) (value, error) {
// TODO(sbarzowski) perhaps a more elegant way to dispatch
switch right := y.(type) {
case *valueString:
case valueString:
left, err := builtinToString(i, trace, x)
if err != nil {
return nil, err
}
return concatStrings(left.(*valueString), right), nil
return concatStrings(left.(valueString), right), nil
}
switch left := x.(type) {
@ -48,12 +48,12 @@ func builtinPlus(i *interpreter, trace traceElement, x, y value) (value, error)
return nil, err
}
return makeDoubleCheck(i, trace, left.value+right.value)
case *valueString:
case valueString:
right, err := builtinToString(i, trace, y)
if err != nil {
return nil, err
}
return concatStrings(left, right.(*valueString)), nil
return concatStrings(left, right.(valueString)), nil
case *valueObject:
switch right := y.(type) {
case *valueObject:
@ -135,7 +135,7 @@ func valueLess(i *interpreter, trace traceElement, x, yv value) (bool, error) {
return false, err
}
return left.value < right.value, nil
case *valueString:
case valueString:
right, err := i.getString(yv, trace)
if err != nil {
return false, err
@ -178,7 +178,7 @@ func builtinLength(i *interpreter, trace traceElement, x value) (value, error) {
num = len(objectFields(x, withoutHidden))
case *valueArray:
num = len(x.elements)
case *valueString:
case valueString:
num = x.length()
case *valueFunction:
num = len(x.Parameters().required)
@ -190,7 +190,7 @@ func builtinLength(i *interpreter, trace traceElement, x value) (value, error) {
func builtinToString(i *interpreter, trace traceElement, x value) (value, error) {
switch x := x.(type) {
case *valueString:
case valueString:
return x, nil
}
var buf bytes.Buffer
@ -209,7 +209,7 @@ func builtinTrace(i *interpreter, trace traceElement, x value, y value) (value,
filename := trace.loc.FileName
line := trace.loc.Begin.Line
fmt.Fprintf(
os.Stderr, "TRACE: %s:%d %s\n", filename, line, xStr.getString())
os.Stderr, "TRACE: %s:%d %s\n", filename, line, xStr.getGoString())
return y, nil
}
@ -306,7 +306,7 @@ func joinArrays(i *interpreter, trace traceElement, sep *valueArray, arr *valueA
return makeValueArray(result), nil
}
func joinStrings(i *interpreter, trace traceElement, sep *valueString, arr *valueArray) (value, error) {
func joinStrings(i *interpreter, trace traceElement, sep valueString, arr *valueArray) (value, error) {
result := make([]rune, 0, arr.length())
first := true
for _, elem := range arr.elements {
@ -317,17 +317,17 @@ func joinStrings(i *interpreter, trace traceElement, sep *valueString, arr *valu
switch v := elemValue.(type) {
case *valueNull:
continue
case *valueString:
case valueString:
if !first {
result = append(result, sep.value...)
result = append(result, sep.getRunes()...)
}
result = append(result, v.value...)
result = append(result, v.getRunes()...)
default:
return nil, i.typeErrorSpecific(elemValue, &valueString{}, trace)
return nil, i.typeErrorSpecific(elemValue, emptyString(), trace)
}
first = false
}
return &valueString{value: result}, nil
return makeStringFromRunes(result), nil
}
func builtinJoin(i *interpreter, trace traceElement, sep, arrv value) (value, error) {
@ -336,7 +336,7 @@ func builtinJoin(i *interpreter, trace traceElement, sep, arrv value) (value, er
return nil, err
}
switch sep := sep.(type) {
case *valueString:
case valueString:
return joinStrings(i, trace, sep, arr)
case *valueArray:
return joinArrays(i, trace, sep, arr)
@ -524,7 +524,7 @@ func primitiveEquals(i *interpreter, trace traceElement, x, y value) (value, err
return nil, err
}
return makeValueBoolean(left.value == right.value), nil
case *valueString:
case valueString:
right, err := i.getString(y, trace)
if err != nil {
return nil, err
@ -559,7 +559,7 @@ func rawEquals(i *interpreter, trace traceElement, x, y value) (bool, error) {
return false, err
}
return left.value == right.value, nil
case *valueString:
case valueString:
right, err := i.getString(y, trace)
if err != nil {
return false, err
@ -660,7 +660,7 @@ func builtinMd5(i *interpreter, trace traceElement, x value) (value, error) {
if err != nil {
return nil, err
}
hash := md5.Sum([]byte(string(str.value)))
hash := md5.Sum([]byte(str.getGoString()))
return makeValueString(hex.EncodeToString(hash[:])), nil
}
@ -669,7 +669,7 @@ func builtinEncodeUTF8(i *interpreter, trace traceElement, x value) (value, erro
if err != nil {
return nil, err
}
s := str.getString()
s := str.getGoString()
elems := make([]*cachedThunk, 0, len(s)) // it will be longer if characters fall outside of ASCII
for _, c := range []byte(s) {
elems = append(elems, readyThunk(makeValueNumber(float64(c))))
@ -721,7 +721,7 @@ func builtinCodepoint(i *interpreter, trace traceElement, x value) (value, error
if str.length() != 1 {
return nil, i.Error(fmt.Sprintf("codepoint takes a string of length 1, got length %v", str.length()), trace)
}
return makeValueNumber(float64(str.value[0])), nil
return makeValueNumber(float64(str.getRunes()[0])), nil
}
func makeDoubleCheck(i *interpreter, trace traceElement, x float64) (value, error) {
@ -823,7 +823,7 @@ func builtinObjectHasEx(i *interpreter, trace traceElement, objv value, fnamev v
return nil, err
}
h := withHiddenFromBool(includeHidden.value)
hasField := objectHasField(objectBinding(obj), string(fname.value), h)
hasField := objectHasField(objectBinding(obj), string(fname.getRunes()), h)
return makeValueBoolean(hasField), nil
}
@ -855,8 +855,8 @@ func builtinSplitLimit(i *interpreter, trace traceElement, strv, cv, maxSplitsV
if maxSplits < -1 {
return nil, i.Error(fmt.Sprintf("std.splitLimit third parameter should be -1 or non-negative, got %v", maxSplits), trace)
}
sStr := str.getString()
sC := c.getString()
sStr := str.getGoString()
sC := c.getGoString()
if len(sC) != 1 {
return nil, i.Error(fmt.Sprintf("std.splitLimit second parameter should have length 1, got %v", len(sC)), trace)
}
@ -889,9 +889,9 @@ func builtinStrReplace(i *interpreter, trace traceElement, strv, fromv, tov valu
if err != nil {
return nil, err
}
sStr := str.getString()
sFrom := from.getString()
sTo := to.getString()
sStr := str.getGoString()
sFrom := from.getGoString()
sTo := to.getGoString()
if len(sFrom) == 0 {
return nil, i.Error("'from' string must not be zero length.", trace)
}
@ -965,7 +965,7 @@ func builtinParseJSON(i *interpreter, trace traceElement, str value) (value, err
if err != nil {
return nil, err
}
s := sval.getString()
s := sval.getGoString()
var parsedJSON interface{}
err = json.Unmarshal([]byte(s), &parsedJSON)
if err != nil {
@ -979,7 +979,7 @@ func builtinExtVar(i *interpreter, trace traceElement, name value) (value, error
if err != nil {
return nil, err
}
index := str.getString()
index := str.getGoString()
if pv, ok := i.extVars[index]; ok {
return i.evaluatePV(pv, trace)
}
@ -991,7 +991,7 @@ func builtinNative(i *interpreter, trace traceElement, name value) (value, error
if err != nil {
return nil, err
}
index := str.getString()
index := str.getGoString()
if f, exists := i.nativeFuncs[index]; exists {
return &valueFunction{ec: f}, nil
}

View File

@ -124,7 +124,7 @@ func (cache *importCache) importAST(importedFrom, importedPath string) (ast.Node
}
// ImportString imports a string, caches it and then returns it.
func (cache *importCache) importString(importedFrom, importedPath string, i *interpreter, trace traceElement) (*valueString, error) {
func (cache *importCache) importString(importedFrom, importedPath string, i *interpreter, trace traceElement) (valueString, error) {
data, _, err := cache.importData(importedFrom, importedPath)
if err != nil {
return nil, i.Error(err.Error(), trace)

View File

@ -379,8 +379,8 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) {
}
var fieldName string
switch fieldNameValue := fieldNameValue.(type) {
case *valueString:
fieldName = fieldNameValue.getString()
case valueString:
fieldName = fieldNameValue.getGoString()
case *valueNull:
// Omitted field.
continue
@ -424,7 +424,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) {
if err != nil {
return nil, err
}
return nil, i.Error(msg.getString(), trace)
return nil, i.Error(msg.getGoString(), trace)
case *ast.Index:
targetValue, err := i.evaluate(node.Target, nonTailCall)
@ -441,7 +441,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) {
if err != nil {
return nil, err
}
return target.index(i, trace, indexString.getString())
return target.index(i, trace, indexString.getGoString())
case *valueArray:
indexInt, err := i.getNumber(index, trace)
if err != nil {
@ -450,7 +450,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) {
// TODO(https://github.com/google/jsonnet/issues/377): non-integer indexes should be an error
return target.index(i, trace, int(indexInt.value))
case *valueString:
case valueString:
indexInt, err := i.getNumber(index, trace)
if err != nil {
return nil, err
@ -517,7 +517,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) {
if err != nil {
return nil, err
}
return objectIndex(i, trace, i.stack.getSelfBinding().super(), indexStr.getString())
return objectIndex(i, trace, i.stack.getSelfBinding().super(), indexStr.getGoString())
case *ast.InSuper:
index, err := i.evaluate(node.Index, nonTailCall)
@ -528,7 +528,7 @@ func (i *interpreter) evaluate(a ast.Node, tc tailCallStatus) (value, error) {
if err != nil {
return nil, err
}
hasField := objectHasField(i.stack.getSelfBinding().super(), indexStr.getString(), withHidden)
hasField := objectHasField(i.stack.getSelfBinding().super(), indexStr.getGoString(), withHidden)
return makeValueBoolean(hasField), nil
case *ast.Function:
@ -636,8 +636,8 @@ func (i *interpreter) manifestJSON(trace traceElement, v value) (interface{}, er
case *valueNumber:
return v.value, nil
case *valueString:
return v.getString(), nil
case valueString:
return v.getGoString(), nil
case *valueNull:
return nil, nil
@ -802,8 +802,8 @@ func (i *interpreter) manifestAndSerializeJSON(
// manifestString expects the value to be a string and returns it.
func (i *interpreter) manifestString(buf *bytes.Buffer, trace traceElement, v value) error {
switch v := v.(type) {
case *valueString:
buf.WriteString(v.getString())
case valueString:
buf.WriteString(v.getGoString())
return nil
default:
return makeRuntimeError(fmt.Sprintf("expected string result, got: %s", v.getType().name), i.getCurrentStackTrace(trace))
@ -1010,16 +1010,16 @@ func (i *interpreter) evaluateInt64(pv potentialValue, trace traceElement) (int6
return i.getInt64(v, trace)
}
func (i *interpreter) getString(val value, trace traceElement) (*valueString, error) {
func (i *interpreter) getString(val value, trace traceElement) (valueString, error) {
switch v := val.(type) {
case *valueString:
case valueString:
return v, nil
default:
return nil, i.typeErrorSpecific(val, &valueString{}, trace)
return nil, i.typeErrorSpecific(val, emptyString(), trace)
}
}
func (i *interpreter) evaluateString(pv potentialValue, trace traceElement) (*valueString, error) {
func (i *interpreter) evaluateString(pv potentialValue, trace traceElement) (valueString, error) {
v, err := i.evaluatePV(pv, trace)
if err != nil {
return nil, err

172
value.go
View File

@ -73,73 +73,165 @@ func (v *valueBase) aValue() {}
// Primitive values
// -------------------------------------
// valueString represents a string value, internally using a []rune for quick
type valueString interface {
value
length() int
getRunes() []rune
getGoString() string
index(i *interpreter, trace traceElement, index int) (value, error)
}
// valueFlatString represents a string value, internally using a []rune for quick
// indexing.
type valueString struct {
type valueFlatString struct {
valueBase
// We use rune slices instead of strings for quick indexing
value []rune
}
func (s *valueString) index(i *interpreter, trace traceElement, index int) (value, error) {
func (s *valueFlatString) index(i *interpreter, trace traceElement, index int) (value, error) {
if 0 <= index && index < s.length() {
return makeValueString(string(s.value[index])), nil
}
return nil, i.Error(fmt.Sprintf("Index %d out of bounds, not within [0, %v)", index, s.length()), trace)
}
func concatStrings(a, b *valueString) *valueString {
result := make([]rune, 0, len(a.value)+len(b.value))
for _, r := range a.value {
result = append(result, r)
}
for _, r := range b.value {
result = append(result, r)
}
return &valueString{value: result}
func (s *valueFlatString) getRunes() []rune {
return s.value
}
func stringLessThan(a, b *valueString) bool {
var length int
if len(a.value) < len(b.value) {
length = len(a.value)
} else {
length = len(b.value)
func (s *valueFlatString) getGoString() string {
return string(s.value)
}
func (s *valueFlatString) length() int {
return len(s.value)
}
func (s *valueFlatString) getType() *valueType {
return stringType
}
// TODO(sbarzowski) this was chosen on a hunch, be more systematic about it
const extendedStringMinLength = 30
// Indirect representation of long strings.
// It consists of two parts, left and right, concatenation of which is the whole string.
type valueStringTree struct {
valueBase
left, right valueString
len int
}
func buildFullString(s valueString, buf *[]rune) {
switch s := s.(type) {
case *valueFlatString:
*buf = append(*buf, s.value...)
case *valueStringTree:
buildFullString(s.left, buf)
buildFullString(s.right, buf)
}
for i := 0; i < length; i++ {
if a.value[i] != b.value[i] {
return a.value[i] < b.value[i]
}
func (s *valueStringTree) flattenToLeft() {
if s.right != nil {
result := make([]rune, 0, s.len)
buildFullString(s, &result)
s.left = makeStringFromRunes(result)
s.right = nil
}
}
func (s *valueStringTree) index(i *interpreter, trace traceElement, index int) (value, error) {
if 0 <= index && index < s.len {
s.flattenToLeft()
return s.left.index(i, trace, index)
}
return nil, i.Error(fmt.Sprintf("Index %d out of bounds, not within [0, %v)", index, s.length()), trace)
}
func (s *valueStringTree) getRunes() []rune {
s.flattenToLeft()
return s.left.getRunes()
}
func (s *valueStringTree) getGoString() string {
s.flattenToLeft()
return s.left.getGoString()
}
func (s *valueStringTree) length() int {
return s.len
}
func (s *valueStringTree) getType() *valueType {
return stringType
}
func emptyString() *valueFlatString {
return &valueFlatString{}
}
func makeStringFromRunes(runes []rune) *valueFlatString {
return &valueFlatString{value: runes}
}
func concatStrings(a, b valueString) valueString {
aLen := a.length()
bLen := b.length()
if aLen == 0 {
return b
} else if bLen == 0 {
return a
} else if aLen+bLen < extendedStringMinLength {
runesA := a.getRunes()
runesB := b.getRunes()
result := make([]rune, 0, aLen+bLen)
result = append(result, runesA...)
result = append(result, runesB...)
return makeStringFromRunes(result)
} else {
return &valueStringTree{
left: a,
right: b,
len: aLen + bLen,
}
}
return len(a.value) < len(b.value)
}
func stringEqual(a, b *valueString) bool {
if len(a.value) != len(b.value) {
func stringLessThan(a, b valueString) bool {
runesA := a.getRunes()
runesB := b.getRunes()
var length int
if len(runesA) < len(runesB) {
length = len(runesA)
} else {
length = len(runesB)
}
for i := 0; i < length; i++ {
if runesA[i] != runesB[i] {
return runesA[i] < runesB[i]
}
}
return len(runesA) < len(runesB)
}
func stringEqual(a, b valueString) bool {
runesA := a.getRunes()
runesB := b.getRunes()
if len(runesA) != len(runesB) {
return false
}
for i := 0; i < len(a.value); i++ {
if a.value[i] != b.value[i] {
for i := 0; i < len(runesA); i++ {
if runesA[i] != runesB[i] {
return false
}
}
return true
}
func (s *valueString) length() int {
return len(s.value)
}
func (s *valueString) getString() string {
return string(s.value)
}
func makeValueString(v string) *valueString {
return &valueString{value: []rune(v)}
}
func (*valueString) getType() *valueType {
return stringType
func makeValueString(v string) valueString {
return &valueFlatString{value: []rune(v)}
}
type valueBoolean struct {