From d9db76631d31d89a90f8142d37d4e36b5b9a0b8d Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Sun, 1 Feb 2026 16:52:26 +0100 Subject: [PATCH] tsdb: fix flaky TestWaitForPendingReadersInTimeRange tests (#17985) The tests were flaky because they used hard-coded time.After(550ms) waits, which had only 50ms margin over WaitForPendingReadersInTimeRange's 500ms poll interval. On slow CI runners, this margin wasn't reliable. Use synctest for deterministic time control: - Wrap test logic in synctest.Test() to use fake time - Use synctest.Wait() to let goroutines reach dormant state - Use time.Sleep() to advance fake time past the poll interval - No more timing-dependent assertions This makes the tests both reliable and ~60x faster (0.05s vs 3s). Fixes both TestWaitForPendingReadersInTimeRange and TestWaitForPendingReadersInTimeRange_AppenderV2. Signed-off-by: Arve Knudsen --- tsdb/head_append_v2_test.go | 39 +++++++++++++++++++++++++++---------- tsdb/head_test.go | 39 +++++++++++++++++++++++++++---------- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/tsdb/head_append_v2_test.go b/tsdb/head_append_v2_test.go index 20401c16fe..6bb88bf16e 100644 --- a/tsdb/head_append_v2_test.go +++ b/tsdb/head_append_v2_test.go @@ -53,6 +53,7 @@ import ( "github.com/prometheus/prometheus/tsdb/wlog" "github.com/prometheus/prometheus/util/compression" "github.com/prometheus/prometheus/util/testutil" + "github.com/prometheus/prometheus/util/testutil/synctest" ) // TODO(bwplotka): Ensure non-ported tests are not deleted from db_test.go when removing AppenderV1 flow (#17632), @@ -1625,17 +1626,35 @@ func TestWaitForPendingReadersInTimeRange_AppenderV2(t *testing.T) { } for _, c := range cases { t.Run(fmt.Sprintf("mint=%d,maxt=%d,shouldWait=%t", c.mint, c.maxt, c.shouldWait), func(t *testing.T) { + // checkWaiting verifies WaitForPendingReadersInTimeRange behavior using synctest + // for deterministic time control. The function should block while an overlapping + // querier is open and return immediately when there's no overlap. checkWaiting := func(cl io.Closer) { - var waitOver atomic.Bool - go func() { - db.head.WaitForPendingReadersInTimeRange(truncMint, truncMaxt) - waitOver.Store(true) - }() - <-time.After(550 * time.Millisecond) - require.Equal(t, !c.shouldWait, waitOver.Load()) - require.NoError(t, cl.Close()) - <-time.After(550 * time.Millisecond) - require.True(t, waitOver.Load()) + synctest.Test(t, func(t *testing.T) { + var waitOver atomic.Bool + go func() { + db.head.WaitForPendingReadersInTimeRange(truncMint, truncMaxt) + waitOver.Store(true) + }() + + // Wait for goroutine to either complete (no overlap) or block on Sleep (overlap). + synctest.Wait() + + if c.shouldWait { + require.False(t, waitOver.Load(), + "WaitForPendingReadersInTimeRange should block while overlapping querier is open") + require.NoError(t, cl.Close()) + // Advance fake time past the 500ms poll interval, then let goroutine process. + time.Sleep(time.Second) + synctest.Wait() + require.True(t, waitOver.Load(), + "WaitForPendingReadersInTimeRange should complete after querier is closed") + } else { + require.True(t, waitOver.Load(), + "WaitForPendingReadersInTimeRange should return immediately when no overlap") + require.NoError(t, cl.Close()) + } + }) } q, err := db.Querier(c.mint, c.maxt) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 493f938860..aee61602ff 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -56,6 +56,7 @@ import ( "github.com/prometheus/prometheus/tsdb/wlog" "github.com/prometheus/prometheus/util/compression" "github.com/prometheus/prometheus/util/testutil" + "github.com/prometheus/prometheus/util/testutil/synctest" ) // newTestHeadDefaultOptions returns the HeadOptions that should be used by default in unit tests. @@ -3861,17 +3862,35 @@ func TestWaitForPendingReadersInTimeRange(t *testing.T) { } for _, c := range cases { t.Run(fmt.Sprintf("mint=%d,maxt=%d,shouldWait=%t", c.mint, c.maxt, c.shouldWait), func(t *testing.T) { + // checkWaiting verifies WaitForPendingReadersInTimeRange behavior using synctest + // for deterministic time control. The function should block while an overlapping + // querier is open and return immediately when there's no overlap. checkWaiting := func(cl io.Closer) { - var waitOver atomic.Bool - go func() { - db.head.WaitForPendingReadersInTimeRange(truncMint, truncMaxt) - waitOver.Store(true) - }() - <-time.After(550 * time.Millisecond) - require.Equal(t, !c.shouldWait, waitOver.Load()) - require.NoError(t, cl.Close()) - <-time.After(550 * time.Millisecond) - require.True(t, waitOver.Load()) + synctest.Test(t, func(t *testing.T) { + var waitOver atomic.Bool + go func() { + db.head.WaitForPendingReadersInTimeRange(truncMint, truncMaxt) + waitOver.Store(true) + }() + + // Wait for goroutine to either complete (no overlap) or block on Sleep (overlap). + synctest.Wait() + + if c.shouldWait { + require.False(t, waitOver.Load(), + "WaitForPendingReadersInTimeRange should block while overlapping querier is open") + require.NoError(t, cl.Close()) + // Advance fake time past the 500ms poll interval, then let goroutine process. + time.Sleep(time.Second) + synctest.Wait() + require.True(t, waitOver.Load(), + "WaitForPendingReadersInTimeRange should complete after querier is closed") + } else { + require.True(t, waitOver.Load(), + "WaitForPendingReadersInTimeRange should return immediately when no overlap") + require.NoError(t, cl.Close()) + } + }) } q, err := db.Querier(c.mint, c.maxt)