From a282d2509971174301408c5a5f96946c478dfb0f Mon Sep 17 00:00:00 2001 From: Xiaochao Dong Date: Thu, 10 Sep 2020 23:05:47 +0800 Subject: [PATCH] tsdb: remove duplicate values set to reduce memory usage(map overhead) (#7915) Signed-off-by: Xiaochao Dong (@damnever) --- tsdb/head.go | 68 +++++++----------------------------------- tsdb/head_test.go | 16 ++++++++-- tsdb/index/postings.go | 30 +++++++++++++++++++ tsdb/querier_test.go | 6 ++-- 4 files changed, 56 insertions(+), 64 deletions(-) diff --git a/tsdb/head.go b/tsdb/head.go index 82b96de102..9366bfe149 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -20,7 +20,6 @@ import ( "path/filepath" "runtime" "sort" - "strings" "sync" "time" @@ -73,7 +72,6 @@ type Head struct { symMtx sync.RWMutex symbols map[string]struct{} - values map[string]stringset // Label names to possible values. deletedMtx sync.Mutex deleted map[uint64]int // Deleted series, and what WAL segment they must be kept until. @@ -303,7 +301,6 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int wal: wal, logger: l, series: newStripeSeries(stripeSize, seriesCallback), - values: map[string]stringset{}, symbols: map[string]struct{}{}, postings: index.NewUnorderedMemPostings(), tombstones: tombstones.NewMemTombstones(), @@ -1345,24 +1342,15 @@ func (h *Head) gc() { defer h.symMtx.Unlock() symbols := make(map[string]struct{}, len(h.symbols)) - values := make(map[string]stringset, len(h.values)) - if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error { - symbols[t.Name] = struct{}{} - symbols[t.Value] = struct{}{} - - ss, ok := values[t.Name] - if !ok { - ss = stringset{} - values[t.Name] = ss - } - ss.set(t.Value) + if err := h.postings.Iter(func(l labels.Label, _ index.Postings) error { + symbols[l.Name] = struct{}{} + symbols[l.Value] = struct{}{} return nil }); err != nil { // This should never happen, as the iteration function only returns nil. panic(err) } h.symbols = symbols - h.values = values } // Tombstones returns a new reader over the head's tombstones @@ -1572,37 +1560,27 @@ func (h *headIndexReader) SortedLabelValues(name string) ([]string, error) { // specific label name that are within the time range mint to maxt. func (h *headIndexReader) LabelValues(name string) ([]string, error) { h.head.symMtx.RLock() - + defer h.head.symMtx.RUnlock() if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() { - h.head.symMtx.RUnlock() return []string{}, nil } - sl := make([]string, 0, len(h.head.values[name])) - for s := range h.head.values[name] { - sl = append(sl, s) - } - h.head.symMtx.RUnlock() - return sl, nil + values := h.head.postings.LabelValues(name) + return values, nil } // LabelNames returns all the unique label names present in the head // that are within the time range mint to maxt. func (h *headIndexReader) LabelNames() ([]string, error) { h.head.symMtx.RLock() - defer h.head.symMtx.RUnlock() - if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() { + h.head.symMtx.RUnlock() return []string{}, nil } - labelNames := make([]string, 0, len(h.head.values)) - for name := range h.head.values { - if name == "" { - continue - } - labelNames = append(labelNames, name) - } + labelNames := h.head.postings.LabelNames() + h.head.symMtx.RUnlock() + sort.Strings(labelNames) return labelNames, nil } @@ -1714,13 +1692,6 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie defer h.symMtx.Unlock() for _, l := range lset { - valset, ok := h.values[l.Name] - if !ok { - valset = stringset{} - h.values[l.Name] = valset - } - valset.set(l.Value) - h.symbols[l.Name] = struct{}{} h.symbols[l.Value] = struct{}{} } @@ -2335,25 +2306,6 @@ func (it *memSafeIterator) At() (int64, float64) { return s.t, s.v } -type stringset map[string]struct{} - -func (ss stringset) set(s string) { - ss[s] = struct{}{} -} - -func (ss stringset) String() string { - return strings.Join(ss.slice(), ",") -} - -func (ss stringset) slice() []string { - slice := make([]string, 0, len(ss)) - for k := range ss { - slice = append(slice, k) - } - sort.Strings(slice) - return slice -} - type mmappedChunk struct { ref uint64 numSamples uint16 diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 3cea3b3e65..1803f267fe 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -387,11 +387,21 @@ func TestHead_Truncate(t *testing.T) { "2": {}, }, h.symbols) - testutil.Equals(t, map[string]stringset{ + values := map[string]map[string]struct{}{} + for _, name := range h.postings.LabelNames() { + ss, ok := values[name] + if !ok { + ss = map[string]struct{}{} + values[name] = ss + } + for _, value := range h.postings.LabelValues(name) { + ss[value] = struct{}{} + } + } + testutil.Equals(t, map[string]map[string]struct{}{ "a": {"1": struct{}{}, "2": struct{}{}}, "b": {"1": struct{}{}}, - "": {"": struct{}{}}, - }, h.values) + }, values) } // Validate various behaviors brought on by firstChunkID accounting for diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index c2f0429757..055f74118e 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -79,6 +79,36 @@ func (p *MemPostings) SortedKeys() []labels.Label { return keys } +// LabelNames returns all the unique label names. +func (p *MemPostings) LabelNames() []string { + p.mtx.RLock() + defer p.mtx.RUnlock() + n := len(p.m) + if n == 0 { + return nil + } + + names := make([]string, 0, n-1) + for name := range p.m { + if name != allPostingsKey.Name { + names = append(names, name) + } + } + return names +} + +// LabelValues returns label values for the given name. +func (p *MemPostings) LabelValues(name string) []string { + p.mtx.RLock() + defer p.mtx.RUnlock() + + values := make([]string, 0, len(p.m[name])) + for v := range p.m[name] { + values = append(values, v) + } + return values +} + // PostingsStats contains cardinality based statistics for postings. type PostingsStats struct { CardinalityMetricsStats []Stat diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 281ffaefed..a028b6b8e1 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -106,7 +106,7 @@ func createIdxChkReaders(t *testing.T, tc []seriesSamples) (IndexReader, ChunkRe postings := index.NewMemPostings() chkReader := mockChunkReader(make(map[uint64]chunkenc.Chunk)) - lblIdx := make(map[string]stringset) + lblIdx := make(map[string]map[string]struct{}) mi := newMockIndex() blockMint := int64(math.MaxInt64) blockMaxt := int64(math.MinInt64) @@ -145,10 +145,10 @@ func createIdxChkReaders(t *testing.T, tc []seriesSamples) (IndexReader, ChunkRe for _, l := range ls { vs, present := lblIdx[l.Name] if !present { - vs = stringset{} + vs = map[string]struct{}{} lblIdx[l.Name] = vs } - vs.set(l.Value) + vs[l.Value] = struct{}{} } }