mirror of
https://github.com/prometheus/prometheus.git
synced 2025-08-06 14:17:12 +02:00
Labels: simpler/faster stringlabels encoding (#16069)
Instead of using varint to encode the size of each label, use a single byte for size 0-254, or a flag value of 255 followed by the size in 3 bytes little-endian. This reduces the amount of code, and also the number of branches in commonly-executed code, so it runs faster. The maximum allowed label name or value length is now 2^24 or 16MB. Memory used by labels changes as follows: * Labels from 0 to 127 bytes length: same * From 128 to 254: 1 byte less * From 255 to 16383: 2 bytes more * From 16384 to 2MB: 1 byte more * From 2MB to 16MB: same Labels: panic on string too long. Slightly more user-friendly than encoding bad data and finding out when we decode. Clarify that Labels.Bytes() encoding can change --------- Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
parent
7789ef27c8
commit
b2c2146d7c
@ -32,8 +32,8 @@ func (ls Labels) Len() int { return len(ls) }
|
|||||||
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
|
func (ls Labels) Swap(i, j int) { ls[i], ls[j] = ls[j], ls[i] }
|
||||||
func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
|
func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
|
||||||
|
|
||||||
// Bytes returns ls as a byte slice.
|
// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
|
||||||
// It uses an byte invalid character as a separator and so should not be used for printing.
|
// Encoding may change over time or between runs of Prometheus.
|
||||||
func (ls Labels) Bytes(buf []byte) []byte {
|
func (ls Labels) Bytes(buf []byte) []byte {
|
||||||
b := bytes.NewBuffer(buf[:0])
|
b := bytes.NewBuffer(buf[:0])
|
||||||
b.WriteByte(labelSep)
|
b.WriteByte(labelSep)
|
||||||
|
@ -140,8 +140,8 @@ func decodeString(t *nameTable, data string, index int) (string, int) {
|
|||||||
return t.ToName(num), index
|
return t.ToName(num), index
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bytes returns ls as a byte slice.
|
// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
|
||||||
// It uses non-printing characters and so should not be used for printing.
|
// Encoding may change over time or between runs of Prometheus.
|
||||||
func (ls Labels) Bytes(buf []byte) []byte {
|
func (ls Labels) Bytes(buf []byte) []byte {
|
||||||
b := bytes.NewBuffer(buf[:0])
|
b := bytes.NewBuffer(buf[:0])
|
||||||
for i := 0; i < len(ls.data); {
|
for i := 0; i < len(ls.data); {
|
||||||
|
@ -24,31 +24,25 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Labels is implemented by a single flat string holding name/value pairs.
|
// Labels is implemented by a single flat string holding name/value pairs.
|
||||||
// Each name and value is preceded by its length in varint encoding.
|
// Each name and value is preceded by its length, encoded as a single byte
|
||||||
|
// for size 0-254, or the following 3 bytes little-endian, if the first byte is 255.
|
||||||
|
// Maximum length allowed is 2^24 or 16MB.
|
||||||
// Names are in order.
|
// Names are in order.
|
||||||
type Labels struct {
|
type Labels struct {
|
||||||
data string
|
data string
|
||||||
}
|
}
|
||||||
|
|
||||||
func decodeSize(data string, index int) (int, int) {
|
func decodeSize(data string, index int) (int, int) {
|
||||||
// Fast-path for common case of a single byte, value 0..127.
|
|
||||||
b := data[index]
|
b := data[index]
|
||||||
index++
|
index++
|
||||||
if b < 0x80 {
|
if b == 255 {
|
||||||
return int(b), index
|
// Larger numbers are encoded as 3 bytes little-endian.
|
||||||
}
|
|
||||||
size := int(b & 0x7F)
|
|
||||||
for shift := uint(7); ; shift += 7 {
|
|
||||||
// Just panic if we go of the end of data, since all Labels strings are constructed internally and
|
// Just panic if we go of the end of data, since all Labels strings are constructed internally and
|
||||||
// malformed data indicates a bug, or memory corruption.
|
// malformed data indicates a bug, or memory corruption.
|
||||||
b := data[index]
|
return int(data[index]) + (int(data[index+1]) << 8) + (int(data[index+2]) << 16), index + 3
|
||||||
index++
|
|
||||||
size |= int(b&0x7F) << shift
|
|
||||||
if b < 0x80 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return size, index
|
// More common case of a single byte, value 0..254.
|
||||||
|
return int(b), index
|
||||||
}
|
}
|
||||||
|
|
||||||
func decodeString(data string, index int) (string, int) {
|
func decodeString(data string, index int) (string, int) {
|
||||||
@ -57,8 +51,8 @@ func decodeString(data string, index int) (string, int) {
|
|||||||
return data[index : index+size], index + size
|
return data[index : index+size], index + size
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bytes returns ls as a byte slice.
|
// Bytes returns an opaque, not-human-readable, encoding of ls, usable as a map key.
|
||||||
// It uses non-printing characters and so should not be used for printing.
|
// Encoding may change over time or between runs of Prometheus.
|
||||||
func (ls Labels) Bytes(buf []byte) []byte {
|
func (ls Labels) Bytes(buf []byte) []byte {
|
||||||
if cap(buf) < len(ls.data) {
|
if cap(buf) < len(ls.data) {
|
||||||
buf = make([]byte, len(ls.data))
|
buf = make([]byte, len(ls.data))
|
||||||
@ -527,48 +521,27 @@ func marshalLabelToSizedBuffer(m *Label, data []byte) int {
|
|||||||
return len(data) - i
|
return len(data) - i
|
||||||
}
|
}
|
||||||
|
|
||||||
func sizeVarint(x uint64) (n int) {
|
func sizeWhenEncoded(x uint64) (n int) {
|
||||||
// Most common case first
|
if x < 255 {
|
||||||
if x < 1<<7 {
|
|
||||||
return 1
|
return 1
|
||||||
|
} else if x <= 1<<24 {
|
||||||
|
return 4
|
||||||
}
|
}
|
||||||
if x >= 1<<56 {
|
panic("String too long to encode as label.")
|
||||||
return 9
|
|
||||||
}
|
|
||||||
if x >= 1<<28 {
|
|
||||||
x >>= 28
|
|
||||||
n = 4
|
|
||||||
}
|
|
||||||
if x >= 1<<14 {
|
|
||||||
x >>= 14
|
|
||||||
n += 2
|
|
||||||
}
|
|
||||||
if x >= 1<<7 {
|
|
||||||
n++
|
|
||||||
}
|
|
||||||
return n + 1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func encodeVarint(data []byte, offset int, v uint64) int {
|
|
||||||
offset -= sizeVarint(v)
|
|
||||||
base := offset
|
|
||||||
for v >= 1<<7 {
|
|
||||||
data[offset] = uint8(v&0x7f | 0x80)
|
|
||||||
v >>= 7
|
|
||||||
offset++
|
|
||||||
}
|
|
||||||
data[offset] = uint8(v)
|
|
||||||
return base
|
|
||||||
}
|
|
||||||
|
|
||||||
// Special code for the common case that a size is less than 128
|
|
||||||
func encodeSize(data []byte, offset, v int) int {
|
func encodeSize(data []byte, offset, v int) int {
|
||||||
if v < 1<<7 {
|
if v < 255 {
|
||||||
offset--
|
offset--
|
||||||
data[offset] = uint8(v)
|
data[offset] = uint8(v)
|
||||||
return offset
|
return offset
|
||||||
}
|
}
|
||||||
return encodeVarint(data, offset, uint64(v))
|
offset -= 4
|
||||||
|
data[offset] = 255
|
||||||
|
data[offset+1] = byte(v)
|
||||||
|
data[offset+2] = byte((v >> 8))
|
||||||
|
data[offset+3] = byte((v >> 16))
|
||||||
|
return offset
|
||||||
}
|
}
|
||||||
|
|
||||||
func labelsSize(lbls []Label) (n int) {
|
func labelsSize(lbls []Label) (n int) {
|
||||||
@ -582,9 +555,9 @@ func labelsSize(lbls []Label) (n int) {
|
|||||||
func labelSize(m *Label) (n int) {
|
func labelSize(m *Label) (n int) {
|
||||||
// strings are encoded as length followed by contents.
|
// strings are encoded as length followed by contents.
|
||||||
l := len(m.Name)
|
l := len(m.Name)
|
||||||
n += l + sizeVarint(uint64(l))
|
n += l + sizeWhenEncoded(uint64(l))
|
||||||
l = len(m.Value)
|
l = len(m.Value)
|
||||||
n += l + sizeVarint(uint64(l))
|
n += l + sizeWhenEncoded(uint64(l))
|
||||||
return n
|
return n
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestLabels_String(t *testing.T) {
|
func TestLabels_String(t *testing.T) {
|
||||||
|
s254 := strings.Repeat("x", 254) // Edge cases for stringlabels encoding.
|
||||||
|
s255 := strings.Repeat("x", 255)
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
labels Labels
|
labels Labels
|
||||||
expected string
|
expected string
|
||||||
@ -43,6 +45,14 @@ func TestLabels_String(t *testing.T) {
|
|||||||
labels: FromStrings("service.name", "t1", "whatever\\whatever", "t2"),
|
labels: FromStrings("service.name", "t1", "whatever\\whatever", "t2"),
|
||||||
expected: `{"service.name"="t1", "whatever\\whatever"="t2"}`,
|
expected: `{"service.name"="t1", "whatever\\whatever"="t2"}`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
labels: FromStrings("aaa", "111", "xx", s254),
|
||||||
|
expected: `{aaa="111", xx="` + s254 + `"}`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
labels: FromStrings("aaa", "111", "xx", s255),
|
||||||
|
expected: `{aaa="111", xx="` + s255 + `"}`,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
str := c.labels.String()
|
str := c.labels.String()
|
||||||
|
Loading…
Reference in New Issue
Block a user