From c5b92f71d4aa531797f2bff72da99a9f19c865e6 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Thu, 27 Mar 2025 21:04:30 +0100 Subject: [PATCH] fix: storage/remote.pool interned refs count and flaky test (#16335) * Fix storage/remote.pool interned refs count and flaky test I saw TestIntern_MultiRef_Concurrent failing on a different PR saying 'expected refs to be 1 but it was 2'. I took a look, and it definitely can be racy, especially with a time.Sleep() of just 1ms. I'm fixing that by explicitly waiting until it has been released, and by repeating that 1000 times, otherwise it's just a recipe for a future flaky test. OTOH, I also took a look at the implementation and saw that we were not holding the RLock() when increasing the references count, so when releasing there was a race condition for the cleanup, I fixed that by holding RLock() while increasing the references count. Signed-off-by: Oleg Zaytsev * s/Equalf/Equal/ Signed-off-by: Oleg Zaytsev --------- Signed-off-by: Oleg Zaytsev --- storage/remote/intern.go | 6 +++++- storage/remote/intern_test.go | 19 +++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/storage/remote/intern.go b/storage/remote/intern.go index 23047acd9b..34edeb370e 100644 --- a/storage/remote/intern.go +++ b/storage/remote/intern.go @@ -61,11 +61,15 @@ func (p *pool) intern(s string) string { p.mtx.RLock() interned, ok := p.pool[s] - p.mtx.RUnlock() if ok { + // Increase the reference count while we're still holding the read lock, + // This will prevent the release() from deleting the entry while we're increasing its ref count. interned.refs.Inc() + p.mtx.RUnlock() return interned.s } + p.mtx.RUnlock() + p.mtx.Lock() defer p.mtx.Unlock() if interned, ok := p.pool[s]; ok { diff --git a/storage/remote/intern_test.go b/storage/remote/intern_test.go index 98e16ef80d..2b6a592ad7 100644 --- a/storage/remote/intern_test.go +++ b/storage/remote/intern_test.go @@ -20,7 +20,6 @@ package remote import ( "testing" - "time" "github.com/stretchr/testify/require" ) @@ -74,17 +73,21 @@ func TestIntern_MultiRef_Concurrent(t *testing.T) { interner.intern(testString) interned, ok := interner.pool[testString] require.True(t, ok) - require.Equalf(t, int64(1), interned.refs.Load(), "expected refs to be 1 but it was %d", interned.refs.Load()) + require.Equal(t, int64(1), interned.refs.Load(), "wrong interned refs count") - go interner.release(testString) - - interner.intern(testString) - - time.Sleep(time.Millisecond) + for i := 0; i < 1000; i++ { + released := make(chan struct{}) + go func() { + interner.release(testString) + close(released) + }() + interner.intern(testString) + <-released + } interner.mtx.RLock() interned, ok = interner.pool[testString] interner.mtx.RUnlock() require.True(t, ok) - require.Equalf(t, int64(1), interned.refs.Load(), "expected refs to be 1 but it was %d", interned.refs.Load()) + require.Equal(t, int64(1), interned.refs.Load(), "wrong interned refs count") }