From c1743b9a2a21ffcf6948ab738ab4cf4b14a18f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Barzowski?= Date: Thu, 22 Nov 2018 18:37:20 +0100 Subject: [PATCH] Add std.encodeUTF8 and std.decodeUTF8 --- builtins.go | 34 ++++++++++++++++++++++++++++++ testdata/decodeUTF8.golden | 7 +++++++ testdata/decodeUTF8.jsonnet | 7 +++++++ testdata/encodeUTF8.golden | 42 +++++++++++++++++++++++++++++++++++++ testdata/encodeUTF8.jsonnet | 7 +++++++ 5 files changed, 97 insertions(+) create mode 100644 testdata/decodeUTF8.golden create mode 100644 testdata/decodeUTF8.jsonnet create mode 100644 testdata/encodeUTF8.golden create mode 100644 testdata/encodeUTF8.jsonnet diff --git a/builtins.go b/builtins.go index 0bcb1d2..cff418a 100644 --- a/builtins.go +++ b/builtins.go @@ -575,6 +575,38 @@ func builtinMd5(i *interpreter, trace TraceElement, x value) (value, error) { return makeValueString(hex.EncodeToString(hash[:])), nil } +func builtinEncodeUTF8(i *interpreter, trace TraceElement, x value) (value, error) { + str, err := i.getString(x, trace) + if err != nil { + return nil, err + } + s := str.getString() + elems := make([]*cachedThunk, 0, len(s)) // it will be longer if characters fall outside of ASCII + for _, c := range []byte(s) { + elems = append(elems, readyThunk(makeValueNumber(float64(c)))) + } + return makeValueArray(elems), nil +} + +func builtinDecodeUTF8(i *interpreter, trace TraceElement, x value) (value, error) { + arr, err := i.getArray(x, trace) + if err != nil { + return nil, err + } + bs := make([]byte, len(arr.elements)) // it will be longer if characters fall outside of ASCII + for pos := range arr.elements { + v, err := i.evaluateInt(arr.elements[pos], trace) + if err != nil { + return nil, err + } + if v < 0 || v > 255 { + return nil, i.Error(fmt.Sprintf("Bytes must be integers in range [0, 255], got %d", v), trace) + } + bs[pos] = byte(v) + } + return makeValueString(string(bs)), nil +} + // Maximum allowed unicode codepoint // https://en.wikipedia.org/wiki/Unicode#Architecture_and_terminology const codepointMax = 0x10FFFF @@ -1016,6 +1048,8 @@ var funcBuiltins = buildBuiltinMap([]builtin{ &unaryBuiltin{name: "md5", function: builtinMd5, parameters: ast.Identifiers{"x"}}, &ternaryBuiltin{name: "strReplace", function: builtinStrReplace, parameters: ast.Identifiers{"str", "from", "to"}}, &unaryBuiltin{name: "parseJson", function: builtinParseJSON, parameters: ast.Identifiers{"str"}}, + &unaryBuiltin{name: "encodeUTF8", function: builtinEncodeUTF8, parameters: ast.Identifiers{"str"}}, + &unaryBuiltin{name: "decodeUTF8", function: builtinDecodeUTF8, parameters: ast.Identifiers{"arr"}}, &unaryBuiltin{name: "native", function: builtinNative, parameters: ast.Identifiers{"x"}}, // internal diff --git a/testdata/decodeUTF8.golden b/testdata/decodeUTF8.golden new file mode 100644 index 0000000..02d2b03 --- /dev/null +++ b/testdata/decodeUTF8.golden @@ -0,0 +1,7 @@ +[ + "", + "A", + "AAA", + "§", + "zażółć geślą jaźń" +] diff --git a/testdata/decodeUTF8.jsonnet b/testdata/decodeUTF8.jsonnet new file mode 100644 index 0000000..fa34866 --- /dev/null +++ b/testdata/decodeUTF8.jsonnet @@ -0,0 +1,7 @@ +[ + std.decodeUTF8([]), + std.decodeUTF8([65]), + std.decodeUTF8([65,65,65]), + std.decodeUTF8([194,167]), + std.decodeUTF8([122,97,197,188,195,179,197,130,196,135,32,103,101,197,155,108,196,133,32,106,97,197,186,197,132]), +] \ No newline at end of file diff --git a/testdata/encodeUTF8.golden b/testdata/encodeUTF8.golden new file mode 100644 index 0000000..33b169d --- /dev/null +++ b/testdata/encodeUTF8.golden @@ -0,0 +1,42 @@ +[ + [ ], + [ + 65 + ], + [ + 65, + 65, + 65 + ], + [ + 194, + 167 + ], + [ + 122, + 97, + 197, + 188, + 195, + 179, + 197, + 130, + 196, + 135, + 32, + 103, + 101, + 197, + 155, + 108, + 196, + 133, + 32, + 106, + 97, + 197, + 186, + 197, + 132 + ] +] diff --git a/testdata/encodeUTF8.jsonnet b/testdata/encodeUTF8.jsonnet new file mode 100644 index 0000000..f42a334 --- /dev/null +++ b/testdata/encodeUTF8.jsonnet @@ -0,0 +1,7 @@ +[ + std.encodeUTF8(''), + std.encodeUTF8('A'), + std.encodeUTF8('AAA'), + std.encodeUTF8('§'), + std.encodeUTF8('zażółć geślą jaźń'), +] \ No newline at end of file