Labels: simpler/faster stringlabels encoding (#16069)

Instead of using varint to encode the size of each label, use a single
byte for size 0-254, or a flag value of 255 followed by the size in
3 bytes little-endian.

This reduces the amount of code, and also the number of branches in
commonly-executed code, so it runs faster.

The maximum allowed label name or value length is now 2^24 or 16MB.

Memory used by labels changes as follows:
* Labels from 0 to 127 bytes length: same
* From 128 to 254: 1 byte less
* From 255 to 16383: 2 bytes more
* From 16384 to 2MB: 1 byte more
* From 2MB to 16MB: same

Labels: panic on string too long.

Slightly more user-friendly than encoding bad data and finding out when
we decode.

Clarify that Labels.Bytes() encoding can change

---------

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2025-04-30 10:53:48 +01:00 committed by GitHub
parent 7789ef27c8
commit b2c2146d7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 38 additions and 55 deletions

View File

@ -32,8 +32,8 @@ func (ls Labels) Len() int { return len(ls) }
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] } func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name } func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
// Bytes returns ls as a byte slice. // Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
// It uses an byte invalid character as a separator and so should not be used for printing. // Encoding may change over time or between runs of Prometheus.
func (ls Labels) Bytes(buf []byte) []byte { func (ls Labels) Bytes(buf []byte) []byte {
b := bytes.NewBuffer(buf[:0]) b := bytes.NewBuffer(buf[:0])
b.WriteByte(labelSep) b.WriteByte(labelSep)

View File

@ -140,8 +140,8 @@ func decodeString(t *nameTable, data string, index int) (string, int) {
return t.ToName(num), index return t.ToName(num), index
} }
// Bytes returns ls as a byte slice. // Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
// It uses non-printing characters and so should not be used for printing. // Encoding may change over time or between runs of Prometheus.
func (ls Labels) Bytes(buf []byte) []byte { func (ls Labels) Bytes(buf []byte) []byte {
b := bytes.NewBuffer(buf[:0]) b := bytes.NewBuffer(buf[:0])
for i := 0; i < len(ls.data); { for i := 0; i < len(ls.data); {

View File

@ -24,31 +24,25 @@ import (
) )
// Labels is implemented by a single flat string holding name/value pairs. // Labels is implemented by a single flat string holding name/value pairs.
// Each name and value is preceded by its length in varint encoding. // Each name and value is preceded by its length, encoded as a single byte
// for size 0-254, or the following 3 bytes little-endian, if the first byte is 255.
// Maximum length allowed is 2^24 or 16MB.
// Names are in order. // Names are in order.
type Labels struct { type Labels struct {
data string data string
} }
func decodeSize(data string, index int) (int, int) { func decodeSize(data string, index int) (int, int) {
// Fast-path for common case of a single byte, value 0..127.
b := data[index] b := data[index]
index++ index++
if b < 0x80 { if b == 255 {
return int(b), index // Larger numbers are encoded as 3 bytes little-endian.
}
size := int(b & 0x7F)
for shift := uint(7); ; shift += 7 {
// Just panic if we go of the end of data, since all Labels strings are constructed internally and // Just panic if we go of the end of data, since all Labels strings are constructed internally and
// malformed data indicates a bug, or memory corruption. // malformed data indicates a bug, or memory corruption.
b := data[index] return int(data[index]) + (int(data[index+1]) << 8) + (int(data[index+2]) << 16), index + 3
index++
size |= int(b&0x7F) << shift
if b < 0x80 {
break
}
} }
return size, index // More common case of a single byte, value 0..254.
return int(b), index
} }
func decodeString(data string, index int) (string, int) { func decodeString(data string, index int) (string, int) {
@ -57,8 +51,8 @@ func decodeString(data string, index int) (string, int) {
return data[index : index+size], index + size return data[index : index+size], index + size
} }
// Bytes returns ls as a byte slice. // Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
// It uses non-printing characters and so should not be used for printing. // Encoding may change over time or between runs of Prometheus.
func (ls Labels) Bytes(buf []byte) []byte { func (ls Labels) Bytes(buf []byte) []byte {
if cap(buf) < len(ls.data) { if cap(buf) < len(ls.data) {
buf = make([]byte, len(ls.data)) buf = make([]byte, len(ls.data))
@ -527,48 +521,27 @@ func marshalLabelToSizedBuffer(m *Label, data []byte) int {
return len(data) - i return len(data) - i
} }
func sizeVarint(x uint64) (n int) { func sizeWhenEncoded(x uint64) (n int) {
// Most common case first if x < 255 {
if x < 1<<7 {
return 1 return 1
} else if x <= 1<<24 {
return 4
} }
if x >= 1<<56 { panic("String too long to encode as label.")
return 9
}
if x >= 1<<28 {
x >>= 28
n = 4
}
if x >= 1<<14 {
x >>= 14
n += 2
}
if x >= 1<<7 {
n++
}
return n + 1
} }
func encodeVarint(data []byte, offset int, v uint64) int {
offset -= sizeVarint(v)
base := offset
for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80)
v >>= 7
offset++
}
data[offset] = uint8(v)
return base
}
// Special code for the common case that a size is less than 128
func encodeSize(data []byte, offset, v int) int { func encodeSize(data []byte, offset, v int) int {
if v < 1<<7 { if v < 255 {
offset-- offset--
data[offset] = uint8(v) data[offset] = uint8(v)
return offset return offset
} }
return encodeVarint(data, offset, uint64(v)) offset -= 4
data[offset] = 255
data[offset+1] = byte(v)
data[offset+2] = byte((v >> 8))
data[offset+3] = byte((v >> 16))
return offset
} }
func labelsSize(lbls []Label) (n int) { func labelsSize(lbls []Label) (n int) {
@ -582,9 +555,9 @@ func labelsSize(lbls []Label) (n int) {
func labelSize(m *Label) (n int) { func labelSize(m *Label) (n int) {
// strings are encoded as length followed by contents. // strings are encoded as length followed by contents.
l := len(m.Name) l := len(m.Name)
n += l + sizeVarint(uint64(l)) n += l + sizeWhenEncoded(uint64(l))
l = len(m.Value) l = len(m.Value)
n += l + sizeVarint(uint64(l)) n += l + sizeWhenEncoded(uint64(l))
return n return n
} }

View File

@ -27,6 +27,8 @@ import (
) )
func TestLabels_String(t *testing.T) { func TestLabels_String(t *testing.T) {
s254 := strings.Repeat("x", 254) // Edge cases for stringlabels encoding.
s255 := strings.Repeat("x", 255)
cases := []struct { cases := []struct {
labels Labels labels Labels
expected string expected string
@ -43,6 +45,14 @@ func TestLabels_String(t *testing.T) {
labels: FromStrings("service.name", "t1", "whatever\\whatever", "t2"), labels: FromStrings("service.name", "t1", "whatever\\whatever", "t2"),
expected: `{"service.name"="t1", "whatever\\whatever"="t2"}`, expected: `{"service.name"="t1", "whatever\\whatever"="t2"}`,
}, },
{
labels: FromStrings("aaa", "111", "xx", s254),
expected: `{aaa="111", xx="` + s254 + `"}`,
},
{
labels: FromStrings("aaa", "111", "xx", s255),
expected: `{aaa="111", xx="` + s255 + `"}`,
},
} }
for _, c := range cases { for _, c := range cases {
str := c.labels.String() str := c.labels.String()