From d5bfbe3114561a3329fc1bdda1d83e0133827220 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 25 Oct 2021 15:14:15 +0200 Subject: [PATCH] improve bstream comments and doc (#9560) * improve bstream comments and doc Signed-off-by: Dieter Plaetinck * feedback Signed-off-by: Dieter Plaetinck --- tsdb/README.md | 2 +- tsdb/chunkenc/bstream.go | 13 ++++++--- tsdb/chunkenc/xor.go | 8 ++++-- tsdb/docs/bstream.md | 62 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 tsdb/docs/bstream.md diff --git a/tsdb/README.md b/tsdb/README.md index 59f800c7ae..4ad8fb0f33 100644 --- a/tsdb/README.md +++ b/tsdb/README.md @@ -10,7 +10,7 @@ Based on the Gorilla TSDB [white papers](http://www.vldb.org/pvldb/vol8/p1816-te Video: [Storing 16 Bytes at Scale](https://youtu.be/b_pEevMAC3I) from [PromCon 2017](https://promcon.io/2017-munich/). -See also the [format documentation](docs/format/README.md). +See also the [format documentation](docs/format/README.md) and [bstream details](docs/bstream.md). A series of blog posts explaining different components of TSDB: * [The Head Block](https://ganeshvernekar.com/blog/prometheus-tsdb-the-head-block/) diff --git a/tsdb/chunkenc/bstream.go b/tsdb/chunkenc/bstream.go index c8efeab1d7..833c9794b6 100644 --- a/tsdb/chunkenc/bstream.go +++ b/tsdb/chunkenc/bstream.go @@ -48,8 +48,8 @@ import ( // bstream is a stream of bits. type bstream struct { - stream []byte // the data stream - count uint8 // how many bits are valid in current byte + stream []byte // The data stream. + count uint8 // How many right-most bits are available for writing in the current byte (the last byte of the stream). } func (b *bstream) bytes() []byte { @@ -86,14 +86,17 @@ func (b *bstream) writeByte(byt byte) { i := len(b.stream) - 1 - // fill up b.b with b.count bits from byt + // Complete the last byte with the leftmost b.count bits from byt. b.stream[i] |= byt >> (8 - b.count) b.stream = append(b.stream, 0) i++ + // Write the remainder, if any. b.stream[i] = byt << b.count } +// writeBits writes the nbits right-most bits of u to the stream +// in left-to-right order. func (b *bstream) writeBits(u uint64, nbits int) { u <<= 64 - uint(nbits) for nbits >= 8 { @@ -115,7 +118,7 @@ type bstreamReader struct { streamOffset int // The offset from which read the next byte from the stream. buffer uint64 // The current buffer, filled from the stream, containing up to 8 bytes from which read bits. - valid uint8 // The number of bits valid to read (from left) in the current buffer. + valid uint8 // The number of right-most bits valid to read (from left) in the current 8 byte buffer. } func newBReader(b []byte) bstreamReader { @@ -148,6 +151,8 @@ func (b *bstreamReader) readBitFast() (bit, error) { return (b.buffer & bitmask) != 0, nil } +// readBits constructs a uint64 with the nbits right-most bits +// read from the stream, and any other bits 0. func (b *bstreamReader) readBits(nbits uint8) (uint64, error) { if b.valid == 0 { if !b.loadNextBuffer(nbits) { diff --git a/tsdb/chunkenc/xor.go b/tsdb/chunkenc/xor.go index 4eabd5a953..ba00a6e811 100644 --- a/tsdb/chunkenc/xor.go +++ b/tsdb/chunkenc/xor.go @@ -200,6 +200,8 @@ func (a *xorAppender) Append(t int64, v float64) { a.tDelta = tDelta } +// bitRange returns whether the given integer can be represented by nbits. +// See docs/bstream.md. func bitRange(x int64, nbits uint8) bool { return -((1<<(nbits-1))-1) <= x && x <= 1<<(nbits-1) } @@ -372,9 +374,11 @@ func (it *xorIterator) Next() bool { it.err = err return false } + + // Account for negative numbers, which come back as high unsigned numbers. + // See docs/bstream.md. if bits > (1 << (sz - 1)) { - // or something - bits = bits - (1 << sz) + bits -= 1 << sz } dod = int64(bits) } diff --git a/tsdb/docs/bstream.md b/tsdb/docs/bstream.md new file mode 100644 index 0000000000..91dec1b148 --- /dev/null +++ b/tsdb/docs/bstream.md @@ -0,0 +1,62 @@ +# bstream details + +This doc describes details of the bstream (bitstream) and how we use it for encoding and decoding. +This doc is incomplete. For more background, see the Gorilla TSDB [white paper](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) +or the original [go-tsz](https://github.com/dgryski/go-tsz) implementation, which this code is based on. + +## Delta-of-delta encoding for timestamps + +We need to be able to encode and decode dod's for timestamps, which can be positive, zero, or negative. +Note that int64's are implemented as [2's complement](https://en.wikipedia.org/wiki/Two%27s_complement) + +and look like: + +``` +0111...111 = maxint64 + ... +0000...111 = 7 +0000...110 = 6 +0000...101 = 5 +0000...100 = 4 +0000...011 = 3 +0000...010 = 2 +0000...001 = 1 +0000...000 = 0 +1111...111 = -1 +1111...110 = -2 +1111...101 = -3 +1111...100 = -4 +1111...011 = -5 +1111...010 = -6 +1111...001 = -7 +1111...000 = -8 + ... +1000...001 = minint64+1 +1000...000 = minint64 +``` + +All numbers have a prefix (of zeroes for positive numbers, of ones for negative numbers), followed by a number of significant digits at the end. +In all cases, the smaller the absolute value of the number, the fewer the amount of significant digits. + +To encode these numbers, we use: +* A prefix which declares the amount of bits that follow (we use a predefined list of options in order of increasing number of significant bits). +* A number of bits which is one more than the number of significant bits. The extra bit is needed because we deal with unsigned integers, although + it isn't exactly a sign bit. (See below for details). + +The `bitRange` function determines whether a given integer can be represented by a number of bits. +For a given number of bits `nbits` we can distinguish (and thus encode) any set of `2^nbits` numbers. +E.g. for `nbits = 3`, we can encode 8 distinct numbers, and we have a choice of choosing our boundaries. For example -4 to 3, +-3 to 4, 0 to 7 or even -2 to 5 (always inclusive). (Observe in the list above that this is always true.) +Because we need to support positive and negative numbers equally, we choose boundaries that grow symmetrically. Following the same example, +we choose -3 to 4. + +When decoding the number, the most interesting part is how to recognize whether a number is negative or positive, and thus which prefix to set. +Note that the bstream library doesn't interpret integers to a specific type, but rather returns them as uint64's (which are really just a container for 64 bits). +Within the ranges we choose, if looked at as unsigned integers, the higher portion of the range represent the negative numbers. +Continuing the same example, the numbers 001, 010, 011 and 100 are returned as unsigned integers 1,2,3,4 and mean the same thing when casted to int64's. +But the others, 101, 110 and 111 are returned as unsigned integers 5,6,7 but actually represent -3, -2 and -1 (see list above), +The cutoff value is the value set by the `nbit`'th bit, and needs a value subtracted that is represented by the `nbit+1`th bit. +In our example, the 3rd bit sets the number 4, and the 4th sets the number 8. So if we see an unsigned integer exceeding 4 (5,6,7) we subtract 8. This gives us our desired values (-3, -2 and -1). + +Careful observers may note that, if we shift our boundaries down by one, the first bit would always indicate the sign (and imply the needed prefix). +In our example of `nbits = 3`, that would mean the range from -4 to 3. But what we have now works just fine too.