From 1263a68875c82bd1bc9a2813671480f44ebdf7ca Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Wed, 12 Jul 2017 18:16:12 +0200 Subject: [PATCH 1/2] Recompact blocks with large number of tombstones Signed-off-by: Goutham Veeramachaneni --- compact.go | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/compact.go b/compact.go index 6158f2d67a..43afa45fac 100644 --- a/compact.go +++ b/compact.go @@ -14,6 +14,7 @@ package tsdb import ( + "encoding/binary" "fmt" "math/rand" "os" @@ -160,7 +161,24 @@ func (c *compactor) Plan() ([][]string, error) { return [][]string{res} } - return sliceDirs(c.selectDirs(dms)), nil + planDirs := sliceDirs(c.selectDirs(dms)) + if len(dirs) > 1 { + return planDirs, nil + } + + // Compact any blocks that have >5% tombstones. + for i := len(dms) - 1; i >= 0; i-- { + meta := dms[i].meta + if meta.MaxTime-meta.MinTime < c.opts.blockRanges[len(c.opts.blockRanges)/2] { + break + } + + if meta.Stats.NumSeries/meta.Stats.NumTombstones <= 20 { // 5% + return [][]string{{dms[i].dir}}, nil + } + } + + return nil, nil } func (c *compactor) selectDirs(ds []dirMeta) []dirMeta { @@ -238,8 +256,6 @@ func compactBlockMetas(blocks ...BlockMeta) (res BlockMeta) { sources := map[ulid.ULID]struct{}{} for _, b := range blocks { - res.Stats.NumSamples += b.Stats.NumSamples - if b.Compaction.Generation > res.Compaction.Generation { res.Compaction.Generation = b.Compaction.Generation } @@ -410,6 +426,11 @@ func populateBlock(blocks []Block, indexw IndexWriter, chunkw ChunkWriter) (*Blo for set.Next() { lset, chks, dranges := set.At() // The chunks here are not fully deleted. + // Skip the series with all deleted chunks. + if len(chks) == 0 { + continue + } + if len(dranges) > 0 { // Re-encode the chunk to not have deleted values. for _, chk := range chks { @@ -439,6 +460,9 @@ func populateBlock(blocks []Block, indexw IndexWriter, chunkw ChunkWriter) (*Blo meta.Stats.NumChunks += uint64(len(chks)) meta.Stats.NumSeries++ + for _, chk := range chks { + meta.Stats.NumSamples += uint64(binary.BigEndian.Uint16(chk.Chunk.Bytes())) + } for _, l := range lset { valset, ok := values[l.Name] From 401bdfd4a65d6aad901910396cf846bf7421c087 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Wed, 12 Jul 2017 18:31:26 +0200 Subject: [PATCH 2/2] Make Chunks return the number of samples inside Signed-off-by: Goutham Veeramachaneni --- chunks/chunk.go | 1 + chunks/xor.go | 5 +++++ compact.go | 3 +-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/chunks/chunk.go b/chunks/chunk.go index 86f456be87..4eeb9c5d79 100644 --- a/chunks/chunk.go +++ b/chunks/chunk.go @@ -43,6 +43,7 @@ type Chunk interface { Encoding() Encoding Appender() (Appender, error) Iterator() Iterator + NumSamples() int } // FromData returns a chunk from a byte slice of chunk data. diff --git a/chunks/xor.go b/chunks/xor.go index a72e9ef0cb..e9bdef0741 100644 --- a/chunks/xor.go +++ b/chunks/xor.go @@ -72,6 +72,11 @@ func (c *XORChunk) Bytes() []byte { return c.b.bytes() } +// NumSamples returns the number of samples in the chunk. +func (c *XORChunk) NumSamples() int { + return int(binary.BigEndian.Uint16(c.Bytes())) +} + // Appender implements the Chunk interface. func (c *XORChunk) Appender() (Appender, error) { it := c.iterator() diff --git a/compact.go b/compact.go index 43afa45fac..1036cf4031 100644 --- a/compact.go +++ b/compact.go @@ -14,7 +14,6 @@ package tsdb import ( - "encoding/binary" "fmt" "math/rand" "os" @@ -461,7 +460,7 @@ func populateBlock(blocks []Block, indexw IndexWriter, chunkw ChunkWriter) (*Blo meta.Stats.NumChunks += uint64(len(chks)) meta.Stats.NumSeries++ for _, chk := range chks { - meta.Stats.NumSamples += uint64(binary.BigEndian.Uint16(chk.Chunk.Bytes())) + meta.Stats.NumSamples += uint64(chk.Chunk.NumSamples()) } for _, l := range lset {