From 665955da482e81eac56b07aef9d345d8c7597e6d Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Wed, 11 Oct 2017 09:33:35 +0200 Subject: [PATCH 1/3] Clarify postings index semantics, handle staleness The postings list index may point to series that no longer exist during garbage collection. This clarifies that this is valid behavior. It would be possible, though more complex, to always keep them in sync. However, series existance means nothing in itself as the queried time range defines whether there's actual data. Thus our definition is sane overall as long as drift is kept small. --- head.go | 9 +-------- index.go | 3 +++ querier.go | 4 ++++ querier_test.go | 10 ++++------ 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/head.go b/head.go index 8f5beb4fb6..1d0f688395 100644 --- a/head.go +++ b/head.go @@ -833,24 +833,17 @@ func (h *headIndexReader) SortedPostings(p Postings) Postings { if err := p.Err(); err != nil { return errPostings{err: errors.Wrap(err, "expand postings")} } - var err error sort.Slice(ep, func(i, j int) bool { - if err != nil { - return false - } a := h.head.series.getByID(ep[i]) b := h.head.series.getByID(ep[j]) if a == nil || b == nil { - err = errors.Errorf("series not found") + level.Debug(h.head.logger).Log("msg", "looked up series not found") return false } return labels.Compare(a.lset, b.lset) < 0 }) - if err != nil { - return errPostings{err: err} - } return newListPostings(ep) } diff --git a/index.go b/index.go index 89f7764331..c0680aa338 100644 --- a/index.go +++ b/index.go @@ -525,6 +525,8 @@ type IndexReader interface { // Postings returns the postings list iterator for the label pair. // The Postings here contain the offsets to the series inside the index. + // Found IDs are not strictly required to point to a valid Series, e.g. during + // background garbage collections. Postings(name, value string) (Postings, error) // SortedPostings returns a postings list that is reordered to be sorted @@ -533,6 +535,7 @@ type IndexReader interface { // Series populates the given labels and chunk metas for the series identified // by the reference. + // Returns ErrNotFound if the ref does not resolve to a known series. Series(ref uint64, lset *labels.Labels, chks *[]ChunkMeta) error // LabelIndices returns the label pairs for which indices exist. diff --git a/querier.go b/querier.go index 808a1057aa..12e05e79df 100644 --- a/querier.go +++ b/querier.go @@ -450,6 +450,10 @@ Outer: for s.p.Next() { ref := s.p.At() if err := s.index.Series(ref, &lset, &chunks); err != nil { + // Postings may be stale. Skip if no underlying series exists. + if errors.Cause(err) == ErrNotFound { + continue + } s.err = err return false } diff --git a/querier_test.go b/querier_test.go index e46f3b9475..b6f95bf7ce 100644 --- a/querier_test.go +++ b/querier_test.go @@ -702,9 +702,8 @@ func TestBaseChunkSeries(t *testing.T) { ref: 108, }, }, - postings: []uint64{12, 10, 108}, - - expIdxs: []int{0, 1, 3}, + postings: []uint64{12, 13, 10, 108}, // 13 doesn't exist and should just be skipped over. + expIdxs: []int{0, 1, 3}, }, { series: []refdSeries{ @@ -718,12 +717,11 @@ func TestBaseChunkSeries(t *testing.T) { { lset: labels.New([]labels.Label{{"b", "c"}}...), chunks: []ChunkMeta{{Ref: 8282}}, - ref: 1, + ref: 3, }, }, postings: []uint64{}, - - expIdxs: []int{}, + expIdxs: []int{}, }, } From 91a154d2286fb52492a99a92c52493d31ea81129 Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Wed, 11 Oct 2017 11:02:57 +0200 Subject: [PATCH 2/3] Fix block printing in cmd/main --- cmd/tsdb/main.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cmd/tsdb/main.go b/cmd/tsdb/main.go index fe84340c05..146aac08ac 100644 --- a/cmd/tsdb/main.go +++ b/cmd/tsdb/main.go @@ -332,20 +332,22 @@ func exitWithError(err error) { os.Exit(1) } -func printBlocks(blocks []tsdb.DiskBlock) { +func printBlocks(blocks []*tsdb.Block) { tw := tsdb.GetNewTabWriter(os.Stdout) defer tw.Flush() fmt.Fprintln(tw, "BLOCK ULID\tMIN TIME\tMAX TIME\tNUM SAMPLES\tNUM CHUNKS\tNUM SERIES") for _, b := range blocks { + meta := b.Meta() + fmt.Fprintf(tw, "%v\t%v\t%v\t%v\t%v\t%v\n", - b.Meta().ULID, - b.Meta().MinTime, - b.Meta().MaxTime, - b.Meta().Stats.NumSamples, - b.Meta().Stats.NumChunks, - b.Meta().Stats.NumSeries, + meta.ULID, + meta.MinTime, + meta.MaxTime, + meta.Stats.NumSamples, + meta.Stats.NumChunks, + meta.Stats.NumSeries, ) } } From 065f42f58c317c19f7d3b470a0cb46270276e806 Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Thu, 12 Oct 2017 15:25:12 +0200 Subject: [PATCH 3/3] head: track number of series not found errors in metric --- head.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/head.go b/head.go index 1d0f688395..e0fd3c076d 100644 --- a/head.go +++ b/head.go @@ -74,6 +74,7 @@ type headMetrics struct { series prometheus.Gauge seriesCreated prometheus.Counter seriesRemoved prometheus.Counter + seriesNotFound prometheus.Counter chunks prometheus.Gauge chunksCreated prometheus.Gauge chunksRemoved prometheus.Gauge @@ -103,6 +104,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { Name: "tsdb_head_series_removed_total", Help: "Total number of series removed in the head", }) + m.seriesNotFound = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "tsdb_head_series_not_found", + Help: "Total number of requests for series that were not found.", + }) m.chunks = prometheus.NewGauge(prometheus.GaugeOpts{ Name: "tsdb_head_chunks", Help: "Total number of chunks in the head block.", @@ -149,6 +154,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { m.series, m.seriesCreated, m.seriesRemoved, + m.seriesNotFound, m.minTime, m.maxTime, m.gcDuration, @@ -852,6 +858,7 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]ChunkM s := h.head.series.getByID(ref) if s == nil { + h.head.metrics.seriesNotFound.Inc() return ErrNotFound } *lbls = append((*lbls)[:0], s.lset...)