mirror of
https://github.com/prometheus/prometheus.git
synced 2025-08-06 06:07:11 +02:00
TSDB: Track count of unknown series referenced during WAL replay
Signed-off-by: Patryk Prus <p@trykpr.us>
This commit is contained in:
parent
05e48fea3d
commit
85fa39032e
@ -16,6 +16,7 @@ package tsdb
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"maps"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@ -50,13 +51,33 @@ type histogramRecord struct {
|
||||
fh *histogram.FloatHistogram
|
||||
}
|
||||
|
||||
type seriesRefSet struct {
|
||||
refs map[chunks.HeadSeriesRef]struct{}
|
||||
mtx sync.Mutex
|
||||
}
|
||||
|
||||
func (s *seriesRefSet) merge(other map[chunks.HeadSeriesRef]struct{}) {
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
maps.Copy(s.refs, other)
|
||||
}
|
||||
|
||||
func (s *seriesRefSet) count() int {
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
return len(s.refs)
|
||||
}
|
||||
|
||||
func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, mmappedChunks, oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk, lastSegment int) (err error) {
|
||||
// Track number of samples that referenced a series we don't know about
|
||||
// Track number of missing series records that were referenced by other records.
|
||||
unknownSeriesRefs := &seriesRefSet{refs: make(map[chunks.HeadSeriesRef]struct{}), mtx: sync.Mutex{}}
|
||||
// Track number of different records that referenced a series we don't know about
|
||||
// for error reporting.
|
||||
var unknownRefs atomic.Uint64
|
||||
var unknownSampleRefs atomic.Uint64
|
||||
var unknownExemplarRefs atomic.Uint64
|
||||
var unknownHistogramRefs atomic.Uint64
|
||||
var unknownMetadataRefs atomic.Uint64
|
||||
var unknownTombstoneRefs atomic.Uint64
|
||||
// Track number of series records that had overlapping m-map chunks.
|
||||
var mmapOverlappingChunks atomic.Uint64
|
||||
|
||||
@ -91,8 +112,9 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
processors[i].setup()
|
||||
|
||||
go func(wp *walSubsetProcessor) {
|
||||
unknown, unknownHistograms, overlapping := wp.processWALSamples(h, mmappedChunks, oooMmappedChunks)
|
||||
unknownRefs.Add(unknown)
|
||||
missingSeries, unknownSamples, unknownHistograms, overlapping := wp.processWALSamples(h, mmappedChunks, oooMmappedChunks)
|
||||
unknownSeriesRefs.merge(missingSeries)
|
||||
unknownSampleRefs.Add(unknownSamples)
|
||||
mmapOverlappingChunks.Add(overlapping)
|
||||
unknownHistogramRefs.Add(unknownHistograms)
|
||||
wg.Done()
|
||||
@ -102,12 +124,14 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
wg.Add(1)
|
||||
exemplarsInput = make(chan record.RefExemplar, 300)
|
||||
go func(input <-chan record.RefExemplar) {
|
||||
missingSeries := make(map[chunks.HeadSeriesRef]struct{})
|
||||
var err error
|
||||
defer wg.Done()
|
||||
for e := range input {
|
||||
ms := h.series.getByID(e.Ref)
|
||||
if ms == nil {
|
||||
unknownExemplarRefs.Inc()
|
||||
missingSeries[e.Ref] = struct{}{}
|
||||
continue
|
||||
}
|
||||
|
||||
@ -121,6 +145,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
h.logger.Warn("Unexpected error when replaying WAL on exemplar record", "err", err)
|
||||
}
|
||||
}
|
||||
unknownSeriesRefs.merge(missingSeries)
|
||||
}(exemplarsInput)
|
||||
|
||||
go func() {
|
||||
@ -220,6 +245,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
}()
|
||||
|
||||
// The records are always replayed from the oldest to the newest.
|
||||
missingSeries := make(map[chunks.HeadSeriesRef]struct{})
|
||||
Outer:
|
||||
for d := range decoded {
|
||||
switch v := d.(type) {
|
||||
@ -287,7 +313,8 @@ Outer:
|
||||
continue
|
||||
}
|
||||
if m := h.series.getByID(chunks.HeadSeriesRef(s.Ref)); m == nil {
|
||||
unknownRefs.Inc()
|
||||
unknownTombstoneRefs.Inc()
|
||||
missingSeries[chunks.HeadSeriesRef(s.Ref)] = struct{}{}
|
||||
continue
|
||||
}
|
||||
h.tombstones.AddInterval(s.Ref, itv)
|
||||
@ -376,6 +403,7 @@ Outer:
|
||||
s := h.series.getByID(m.Ref)
|
||||
if s == nil {
|
||||
unknownMetadataRefs.Inc()
|
||||
missingSeries[m.Ref] = struct{}{}
|
||||
continue
|
||||
}
|
||||
s.meta = &metadata.Metadata{
|
||||
@ -389,6 +417,7 @@ Outer:
|
||||
panic(fmt.Errorf("unexpected decoded type: %T", d))
|
||||
}
|
||||
}
|
||||
unknownSeriesRefs.merge(missingSeries)
|
||||
|
||||
if decodeErr != nil {
|
||||
return decodeErr
|
||||
@ -411,13 +440,15 @@ Outer:
|
||||
return fmt.Errorf("read records: %w", err)
|
||||
}
|
||||
|
||||
if unknownRefs.Load()+unknownExemplarRefs.Load()+unknownHistogramRefs.Load()+unknownMetadataRefs.Load() > 0 {
|
||||
if unknownSampleRefs.Load()+unknownExemplarRefs.Load()+unknownHistogramRefs.Load()+unknownMetadataRefs.Load()+unknownTombstoneRefs.Load() > 0 {
|
||||
h.logger.Warn(
|
||||
"Unknown series references",
|
||||
"samples", unknownRefs.Load(),
|
||||
"series", unknownSeriesRefs.count(),
|
||||
"samples", unknownSampleRefs.Load(),
|
||||
"exemplars", unknownExemplarRefs.Load(),
|
||||
"histograms", unknownHistogramRefs.Load(),
|
||||
"metadata", unknownMetadataRefs.Load(),
|
||||
"tombstones", unknownTombstoneRefs.Load(),
|
||||
)
|
||||
}
|
||||
if count := mmapOverlappingChunks.Load(); count > 0 {
|
||||
@ -548,10 +579,13 @@ func (wp *walSubsetProcessor) reuseHistogramBuf() []histogramRecord {
|
||||
// processWALSamples adds the samples it receives to the head and passes
|
||||
// the buffer received to an output channel for reuse.
|
||||
// Samples before the minValidTime timestamp are discarded.
|
||||
func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk) (unknownRefs, unknownHistogramRefs, mmapOverlappingChunks uint64) {
|
||||
func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk) (map[chunks.HeadSeriesRef]struct{}, uint64, uint64, uint64) {
|
||||
defer close(wp.output)
|
||||
defer close(wp.histogramsOutput)
|
||||
|
||||
missingSeries := make(map[chunks.HeadSeriesRef]struct{})
|
||||
var unknownSampleRefs, unknownHistogramRefs, mmapOverlappingChunks uint64
|
||||
|
||||
minValidTime := h.minValidTime.Load()
|
||||
mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
|
||||
appendChunkOpts := chunkOpts{
|
||||
@ -573,7 +607,8 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
|
||||
for _, s := range in.samples {
|
||||
ms := h.series.getByID(s.Ref)
|
||||
if ms == nil {
|
||||
unknownRefs++
|
||||
unknownSampleRefs++
|
||||
missingSeries[s.Ref] = struct{}{}
|
||||
continue
|
||||
}
|
||||
if s.T <= ms.mmMaxTime {
|
||||
@ -603,6 +638,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
|
||||
ms := h.series.getByID(s.ref)
|
||||
if ms == nil {
|
||||
unknownHistogramRefs++
|
||||
missingSeries[s.ref] = struct{}{}
|
||||
continue
|
||||
}
|
||||
if s.t <= ms.mmMaxTime {
|
||||
@ -633,13 +669,15 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
|
||||
}
|
||||
h.updateMinMaxTime(mint, maxt)
|
||||
|
||||
return unknownRefs, unknownHistogramRefs, mmapOverlappingChunks
|
||||
return missingSeries, unknownSampleRefs, unknownHistogramRefs, mmapOverlappingChunks
|
||||
}
|
||||
|
||||
func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, lastMmapRef chunks.ChunkDiskMapperRef) (err error) {
|
||||
// Track number of samples, histogram samples, m-map markers, that referenced a series we don't know about
|
||||
// Track number of missing series records that were referenced by other records.
|
||||
unknownSeriesRefs := &seriesRefSet{refs: make(map[chunks.HeadSeriesRef]struct{}), mtx: sync.Mutex{}}
|
||||
// Track number of samples, histogram samples, and m-map markers that referenced a series we don't know about
|
||||
// for error reporting.
|
||||
var unknownRefs, unknownHistogramRefs, mmapMarkerUnknownRefs atomic.Uint64
|
||||
var unknownSampleRefs, unknownHistogramRefs, mmapMarkerUnknownRefs atomic.Uint64
|
||||
|
||||
lastSeq, lastOff := lastMmapRef.Unpack()
|
||||
// Start workers that each process samples for a partition of the series ID space.
|
||||
@ -673,8 +711,9 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
processors[i].setup()
|
||||
|
||||
go func(wp *wblSubsetProcessor) {
|
||||
unknown, unknownHistograms := wp.processWBLSamples(h)
|
||||
unknownRefs.Add(unknown)
|
||||
missingSeries, unknownSamples, unknownHistograms := wp.processWBLSamples(h)
|
||||
unknownSeriesRefs.merge(missingSeries)
|
||||
unknownSampleRefs.Add(unknownSamples)
|
||||
unknownHistogramRefs.Add(unknownHistograms)
|
||||
wg.Done()
|
||||
}(&processors[i])
|
||||
@ -742,6 +781,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
}()
|
||||
|
||||
// The records are always replayed from the oldest to the newest.
|
||||
missingSeries := make(map[chunks.HeadSeriesRef]struct{})
|
||||
for d := range decodedCh {
|
||||
switch v := d.(type) {
|
||||
case []record.RefSample:
|
||||
@ -794,6 +834,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
ms := h.series.getByID(rm.Ref)
|
||||
if ms == nil {
|
||||
mmapMarkerUnknownRefs.Inc()
|
||||
missingSeries[rm.Ref] = struct{}{}
|
||||
continue
|
||||
}
|
||||
idx := uint64(ms.ref) % uint64(concurrency)
|
||||
@ -867,6 +908,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
panic(fmt.Errorf("unexpected decodedCh type: %T", d))
|
||||
}
|
||||
}
|
||||
unknownSeriesRefs.merge(missingSeries)
|
||||
|
||||
if decodeErr != nil {
|
||||
return decodeErr
|
||||
@ -882,9 +924,16 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
||||
return fmt.Errorf("read records: %w", err)
|
||||
}
|
||||
|
||||
if unknownRefs.Load() > 0 || mmapMarkerUnknownRefs.Load() > 0 {
|
||||
h.logger.Warn("Unknown series references for ooo WAL replay", "samples", unknownRefs.Load(), "mmap_markers", mmapMarkerUnknownRefs.Load())
|
||||
if unknownSampleRefs.Load()+unknownHistogramRefs.Load()+mmapMarkerUnknownRefs.Load() > 0 {
|
||||
h.logger.Warn(
|
||||
"Unknown series references for ooo WAL replay",
|
||||
"series", unknownSeriesRefs.count(),
|
||||
"samples", unknownSampleRefs.Load(),
|
||||
"histograms", unknownHistogramRefs.Load(),
|
||||
"mmap_markers", mmapMarkerUnknownRefs.Load(),
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -952,10 +1001,13 @@ func (wp *wblSubsetProcessor) reuseHistogramBuf() []histogramRecord {
|
||||
|
||||
// processWBLSamples adds the samples it receives to the head and passes
|
||||
// the buffer received to an output channel for reuse.
|
||||
func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs, unknownHistogramRefs uint64) {
|
||||
func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (map[chunks.HeadSeriesRef]struct{}, uint64, uint64) {
|
||||
defer close(wp.output)
|
||||
defer close(wp.histogramsOutput)
|
||||
|
||||
missingSeries := make(map[chunks.HeadSeriesRef]struct{})
|
||||
var unknownSampleRefs, unknownHistogramRefs uint64
|
||||
|
||||
oooCapMax := h.opts.OutOfOrderCapMax.Load()
|
||||
// We don't check for minValidTime for ooo samples.
|
||||
mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
|
||||
@ -972,7 +1024,8 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs, unknownHi
|
||||
for _, s := range in.samples {
|
||||
ms := h.series.getByID(s.Ref)
|
||||
if ms == nil {
|
||||
unknownRefs++
|
||||
unknownSampleRefs++
|
||||
missingSeries[s.Ref] = struct{}{}
|
||||
continue
|
||||
}
|
||||
ok, chunkCreated, _ := ms.insert(s.T, s.V, nil, nil, h.chunkDiskMapper, oooCapMax, h.logger)
|
||||
@ -997,6 +1050,7 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs, unknownHi
|
||||
ms := h.series.getByID(s.ref)
|
||||
if ms == nil {
|
||||
unknownHistogramRefs++
|
||||
missingSeries[s.ref] = struct{}{}
|
||||
continue
|
||||
}
|
||||
var chunkCreated bool
|
||||
@ -1027,7 +1081,7 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs, unknownHi
|
||||
|
||||
h.updateMinOOOMaxOOOTime(mint, maxt)
|
||||
|
||||
return unknownRefs, unknownHistogramRefs
|
||||
return missingSeries, unknownSampleRefs, unknownHistogramRefs
|
||||
}
|
||||
|
||||
const (
|
||||
|
Loading…
Reference in New Issue
Block a user