From b2c2146d7c3d2f32b273c466dfa792d2b3bf56b2 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Wed, 30 Apr 2025 10:53:48 +0100 Subject: [PATCH] Labels: simpler/faster stringlabels encoding (#16069) Instead of using varint to encode the size of each label, use a single byte for size 0-254, or a flag value of 255 followed by the size in 3 bytes little-endian. This reduces the amount of code, and also the number of branches in commonly-executed code, so it runs faster. The maximum allowed label name or value length is now 2^24 or 16MB. Memory used by labels changes as follows: * Labels from 0 to 127 bytes length: same * From 128 to 254: 1 byte less * From 255 to 16383: 2 bytes more * From 16384 to 2MB: 1 byte more * From 2MB to 16MB: same Labels: panic on string too long. Slightly more user-friendly than encoding bad data and finding out when we decode. Clarify that Labels.Bytes() encoding can change --------- Signed-off-by: Bryan Boreham --- model/labels/labels.go | 4 +- model/labels/labels_dedupelabels.go | 4 +- model/labels/labels_stringlabels.go | 75 +++++++++-------------------- model/labels/labels_test.go | 10 ++++ 4 files changed, 38 insertions(+), 55 deletions(-) diff --git a/model/labels/labels.go b/model/labels/labels.go index ed66d73cbf..d148490edb 100644 --- a/model/labels/labels.go +++ b/model/labels/labels.go @@ -32,8 +32,8 @@ func (ls Labels) Len() int { return len(ls) } func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] } func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name } -// Bytes returns ls as a byte slice. -// It uses an byte invalid character as a separator and so should not be used for printing. +// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key. +// Encoding may change over time or between runs of Prometheus. func (ls Labels) Bytes(buf []byte) []byte { b := bytes.NewBuffer(buf[:0]) b.WriteByte(labelSep) diff --git a/model/labels/labels_dedupelabels.go b/model/labels/labels_dedupelabels.go index a0d83e0044..38cf91301c 100644 --- a/model/labels/labels_dedupelabels.go +++ b/model/labels/labels_dedupelabels.go @@ -140,8 +140,8 @@ func decodeString(t *nameTable, data string, index int) (string, int) { return t.ToName(num), index } -// Bytes returns ls as a byte slice. -// It uses non-printing characters and so should not be used for printing. +// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key. +// Encoding may change over time or between runs of Prometheus. func (ls Labels) Bytes(buf []byte) []byte { b := bytes.NewBuffer(buf[:0]) for i := 0; i < len(ls.data); { diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index f49ed96f65..dfb374f8dd 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -24,31 +24,25 @@ import ( ) // Labels is implemented by a single flat string holding name/value pairs. -// Each name and value is preceded by its length in varint encoding. +// Each name and value is preceded by its length, encoded as a single byte +// for size 0-254, or the following 3 bytes little-endian, if the first byte is 255. +// Maximum length allowed is 2^24 or 16MB. // Names are in order. type Labels struct { data string } func decodeSize(data string, index int) (int, int) { - // Fast-path for common case of a single byte, value 0..127. b := data[index] index++ - if b < 0x80 { - return int(b), index - } - size := int(b & 0x7F) - for shift := uint(7); ; shift += 7 { + if b == 255 { + // Larger numbers are encoded as 3 bytes little-endian. // Just panic if we go of the end of data, since all Labels strings are constructed internally and // malformed data indicates a bug, or memory corruption. - b := data[index] - index++ - size |= int(b&0x7F) << shift - if b < 0x80 { - break - } + return int(data[index]) + (int(data[index+1]) << 8) + (int(data[index+2]) << 16), index + 3 } - return size, index + // More common case of a single byte, value 0..254. + return int(b), index } func decodeString(data string, index int) (string, int) { @@ -57,8 +51,8 @@ func decodeString(data string, index int) (string, int) { return data[index : index+size], index + size } -// Bytes returns ls as a byte slice. -// It uses non-printing characters and so should not be used for printing. +// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key. +// Encoding may change over time or between runs of Prometheus. func (ls Labels) Bytes(buf []byte) []byte { if cap(buf) < len(ls.data) { buf = make([]byte, len(ls.data)) @@ -527,48 +521,27 @@ func marshalLabelToSizedBuffer(m *Label, data []byte) int { return len(data) - i } -func sizeVarint(x uint64) (n int) { - // Most common case first - if x < 1<<7 { +func sizeWhenEncoded(x uint64) (n int) { + if x < 255 { return 1 + } else if x <= 1<<24 { + return 4 } - if x >= 1<<56 { - return 9 - } - if x >= 1<<28 { - x >>= 28 - n = 4 - } - if x >= 1<<14 { - x >>= 14 - n += 2 - } - if x >= 1<<7 { - n++ - } - return n + 1 + panic("String too long to encode as label.") } -func encodeVarint(data []byte, offset int, v uint64) int { - offset -= sizeVarint(v) - base := offset - for v >= 1<<7 { - data[offset] = uint8(v&0x7f | 0x80) - v >>= 7 - offset++ - } - data[offset] = uint8(v) - return base -} - -// Special code for the common case that a size is less than 128 func encodeSize(data []byte, offset, v int) int { - if v < 1<<7 { + if v < 255 { offset-- data[offset] = uint8(v) return offset } - return encodeVarint(data, offset, uint64(v)) + offset -= 4 + data[offset] = 255 + data[offset+1] = byte(v) + data[offset+2] = byte((v >> 8)) + data[offset+3] = byte((v >> 16)) + return offset } func labelsSize(lbls []Label) (n int) { @@ -582,9 +555,9 @@ func labelsSize(lbls []Label) (n int) { func labelSize(m *Label) (n int) { // strings are encoded as length followed by contents. l := len(m.Name) - n += l + sizeVarint(uint64(l)) + n += l + sizeWhenEncoded(uint64(l)) l = len(m.Value) - n += l + sizeVarint(uint64(l)) + n += l + sizeWhenEncoded(uint64(l)) return n } diff --git a/model/labels/labels_test.go b/model/labels/labels_test.go index a2a7734326..b7ba71b553 100644 --- a/model/labels/labels_test.go +++ b/model/labels/labels_test.go @@ -27,6 +27,8 @@ import ( ) func TestLabels_String(t *testing.T) { + s254 := strings.Repeat("x", 254) // Edge cases for stringlabels encoding. + s255 := strings.Repeat("x", 255) cases := []struct { labels Labels expected string @@ -43,6 +45,14 @@ func TestLabels_String(t *testing.T) { labels: FromStrings("service.name", "t1", "whatever\\whatever", "t2"), expected: `{"service.name"="t1", "whatever\\whatever"="t2"}`, }, + { + labels: FromStrings("aaa", "111", "xx", s254), + expected: `{aaa="111", xx="` + s254 + `"}`, + }, + { + labels: FromStrings("aaa", "111", "xx", s255), + expected: `{aaa="111", xx="` + s255 + `"}`, + }, } for _, c := range cases { str := c.labels.String()