Labels: simpler/faster stringlabels encoding (#16069)

Instead of using varint to encode the size of each label, use a single
byte for size 0-254, or a flag value of 255 followed by the size in
3 bytes little-endian.

This reduces the amount of code, and also the number of branches in
commonly-executed code, so it runs faster.

The maximum allowed label name or value length is now 2^24 or 16MB.

Memory used by labels changes as follows:
* Labels from 0 to 127 bytes length: same
* From 128 to 254: 1 byte less
* From 255 to 16383: 2 bytes more
* From 16384 to 2MB: 1 byte more
* From 2MB to 16MB: same

Labels: panic on string too long.

Slightly more user-friendly than encoding bad data and finding out when
we decode.

Clarify that Labels.Bytes() encoding can change

---------

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2025-04-30 10:53:48 +01:00 committed by GitHub
parent 7789ef27c8
commit b2c2146d7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 38 additions and 55 deletions

View File

@ -32,8 +32,8 @@ func (ls Labels) Len() int { return len(ls) }
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
// Bytes returns ls as a byte slice.
// It uses an byte invalid character as a separator and so should not be used for printing.
// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
// Encoding may change over time or between runs of Prometheus.
func (ls Labels) Bytes(buf []byte) []byte {
b := bytes.NewBuffer(buf[:0])
b.WriteByte(labelSep)

View File

@ -140,8 +140,8 @@ func decodeString(t *nameTable, data string, index int) (string, int) {
return t.ToName(num), index
}
// Bytes returns ls as a byte slice.
// It uses non-printing characters and so should not be used for printing.
// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
// Encoding may change over time or between runs of Prometheus.
func (ls Labels) Bytes(buf []byte) []byte {
b := bytes.NewBuffer(buf[:0])
for i := 0; i < len(ls.data); {

View File

@ -24,31 +24,25 @@ import (
)
// Labels is implemented by a single flat string holding name/value pairs.
// Each name and value is preceded by its length in varint encoding.
// Each name and value is preceded by its length, encoded as a single byte
// for size 0-254, or the following 3 bytes little-endian, if the first byte is 255.
// Maximum length allowed is 2^24 or 16MB.
// Names are in order.
type Labels struct {
data string
}
func decodeSize(data string, index int) (int, int) {
// Fast-path for common case of a single byte, value 0..127.
b := data[index]
index++
if b < 0x80 {
return int(b), index
}
size := int(b & 0x7F)
for shift := uint(7); ; shift += 7 {
if b == 255 {
// Larger numbers are encoded as 3 bytes little-endian.
// Just panic if we go of the end of data, since all Labels strings are constructed internally and
// malformed data indicates a bug, or memory corruption.
b := data[index]
index++
size |= int(b&0x7F) << shift
if b < 0x80 {
break
return int(data[index]) + (int(data[index+1]) << 8) + (int(data[index+2]) << 16), index + 3
}
}
return size, index
// More common case of a single byte, value 0..254.
return int(b), index
}
func decodeString(data string, index int) (string, int) {
@ -57,8 +51,8 @@ func decodeString(data string, index int) (string, int) {
return data[index : index+size], index + size
}
// Bytes returns ls as a byte slice.
// It uses non-printing characters and so should not be used for printing.
// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
// Encoding may change over time or between runs of Prometheus.
func (ls Labels) Bytes(buf []byte) []byte {
if cap(buf) < len(ls.data) {
buf = make([]byte, len(ls.data))
@ -527,48 +521,27 @@ func marshalLabelToSizedBuffer(m *Label, data []byte) int {
return len(data) - i
}
func sizeVarint(x uint64) (n int) {
// Most common case first
if x < 1<<7 {
func sizeWhenEncoded(x uint64) (n int) {
if x < 255 {
return 1
} else if x <= 1<<24 {
return 4
}
if x >= 1<<56 {
return 9
}
if x >= 1<<28 {
x >>= 28
n = 4
}
if x >= 1<<14 {
x >>= 14
n += 2
}
if x >= 1<<7 {
n++
}
return n + 1
panic("String too long to encode as label.")
}
func encodeVarint(data []byte, offset int, v uint64) int {
offset -= sizeVarint(v)
base := offset
for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80)
v >>= 7
offset++
}
data[offset] = uint8(v)
return base
}
// Special code for the common case that a size is less than 128
func encodeSize(data []byte, offset, v int) int {
if v < 1<<7 {
if v < 255 {
offset--
data[offset] = uint8(v)
return offset
}
return encodeVarint(data, offset, uint64(v))
offset -= 4
data[offset] = 255
data[offset+1] = byte(v)
data[offset+2] = byte((v >> 8))
data[offset+3] = byte((v >> 16))
return offset
}
func labelsSize(lbls []Label) (n int) {
@ -582,9 +555,9 @@ func labelsSize(lbls []Label) (n int) {
func labelSize(m *Label) (n int) {
// strings are encoded as length followed by contents.
l := len(m.Name)
n += l + sizeVarint(uint64(l))
n += l + sizeWhenEncoded(uint64(l))
l = len(m.Value)
n += l + sizeVarint(uint64(l))
n += l + sizeWhenEncoded(uint64(l))
return n
}

View File

@ -27,6 +27,8 @@ import (
)
func TestLabels_String(t *testing.T) {
s254 := strings.Repeat("x", 254) // Edge cases for stringlabels encoding.
s255 := strings.Repeat("x", 255)
cases := []struct {
labels Labels
expected string
@ -43,6 +45,14 @@ func TestLabels_String(t *testing.T) {
labels: FromStrings("service.name", "t1", "whatever\\whatever", "t2"),
expected: `{"service.name"="t1", "whatever\\whatever"="t2"}`,
},
{
labels: FromStrings("aaa", "111", "xx", s254),
expected: `{aaa="111", xx="` + s254 + `"}`,
},
{
labels: FromStrings("aaa", "111", "xx", s255),
expected: `{aaa="111", xx="` + s255 + `"}`,
},
}
for _, c := range cases {
str := c.labels.String()