diff --git a/tsdb/head.go b/tsdb/head.go index 4840f0cae5..fb5c4def9e 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -420,7 +420,8 @@ type headMetrics struct { mmapChunkCorruptionTotal prometheus.Counter snapshotReplayErrorTotal prometheus.Counter // Will be either 0 or 1. oooHistogram prometheus.Histogram - mmapChunksTotal prometheus.Counter + mmapChunksTotal prometheus.Counter + headChunksMaxPendingMmap prometheus.Gauge walReplayUnknownRefsTotal *prometheus.CounterVec wblReplayUnknownRefsTotal *prometheus.CounterVec } @@ -560,6 +561,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { Name: "prometheus_tsdb_mmap_chunks_total", Help: "Total number of chunks that were memory-mapped.", }), + headChunksMaxPendingMmap: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "prometheus_tsdb_head_chunks_max_pending_mmap", + Help: "Maximum number of head chunks pending m-mapping observed in any individual series during the last m-map pass.", + }), walReplayUnknownRefsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "prometheus_tsdb_wal_replay_unknown_refs_total", Help: "Total number of unknown series references encountered during WAL replay.", @@ -598,6 +603,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { m.checkpointCreationTotal, m.oooHistogram, m.mmapChunksTotal, + m.headChunksMaxPendingMmap, m.mmapChunkCorruptionTotal, m.snapshotReplayErrorTotal, // Metrics bound to functions and not needed in tests @@ -1945,12 +1951,17 @@ func (h *Head) getOrCreateWithOptionalID(id chunks.HeadSeriesRef, hash uint64, l // since holding the lock during an append could delay the next scrape or cause query timeouts. func (h *Head) mmapHeadChunks() { var count int + var maxPendingMmapChunks uint32 for i := range h.series.size { h.series.locks[i].RLock() for _, series := range h.series.series[i] { - if series.headChunkCount.Load() < 2 { // < 2 means 0 or 1 head chunks, nothing to mmap. + hcc := series.headChunkCount.Load() + if hcc < 2 { // < 2 means 0 or 1 head chunks, nothing to mmap. continue } + if hcc > maxPendingMmapChunks { + maxPendingMmapChunks = hcc + } series.Lock() count += series.mmapChunks(h.chunkDiskMapper) @@ -1958,6 +1969,7 @@ func (h *Head) mmapHeadChunks() { } h.series.locks[i].RUnlock() } + h.metrics.headChunksMaxPendingMmap.Set(float64(maxPendingMmapChunks)) h.metrics.mmapChunksTotal.Add(float64(count)) } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index d4279185fe..57735877a4 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -8231,12 +8231,14 @@ func TestHead_mmapHeadChunks(t *testing.T) { require.Equal(t, uint32(1), getCount(lblsB), "headChunkCount should be 1 after mmap") require.Equal(t, uint32(1), getCount(lblsC), "headChunkCount should be 1 after mmap") require.Equal(t, 0, countReady(), "ready set should be empty after mmapHeadChunks") + require.Equal(t, float64(3), prom_testutil.ToFloat64(h.metrics.headChunksMaxPendingMmap), "max pending mmap should be 3 (both series had 3 head chunks)") // A second call should be a no-op. beforeMetric := prom_testutil.ToFloat64(h.metrics.mmapChunksTotal) h.mmapHeadChunks() afterMetric := prom_testutil.ToFloat64(h.metrics.mmapChunksTotal) require.Equal(t, beforeMetric, afterMetric, "second call should mmap 0 chunks") + require.Equal(t, float64(0), prom_testutil.ToFloat64(h.metrics.headChunksMaxPendingMmap), "max pending mmap should be 0 when no series needs mmapping") // Only newly ready series should be processed. app = h.Appender(t.Context()) @@ -8257,6 +8259,7 @@ func TestHead_mmapHeadChunks(t *testing.T) { afterMetric = prom_testutil.ToFloat64(h.metrics.mmapChunksTotal) require.Greater(t, afterMetric, beforeMetric, "third call should mmap chunks from series B") require.Equal(t, uint32(1), getCount(lblsB), "series B headChunkCount should be 1 after mmap") + require.Equal(t, float64(3), prom_testutil.ToFloat64(h.metrics.headChunksMaxPendingMmap), "max pending mmap should be 3 (only series B had 3 head chunks)") } func TestHead_mmapHeadChunks_oooDoesNotInflateCount(t *testing.T) {