From fb2360ff880beed62e2ddfb562ee5b343086cad8 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 28 Aug 2024 08:31:42 -0700 Subject: [PATCH] when a drive is closed cancel the cleanupTrash goroutine (#20337) when a hung drive is hot-unplugged, the server might go into a loop where the previous `format.json` is somehow still accessible to the process, we try to re-init() drives, but that seems to cause a previous goroutine to hang around since it is not canceled away when the drive is closed. Bonus: add deadline for immediate purge routine, to unblock it if the drive is blocking mutations. --- cmd/xl-storage.go | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 63b3ff488..f46820d91 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -118,7 +118,8 @@ type xlStorage struct { major, minor uint32 fsType string - immediatePurge chan string + immediatePurge chan string + immediatePurgeCancel context.CancelFunc // mutex to prevent concurrent read operations overloading walks. rotational bool @@ -216,17 +217,21 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) { if globalIsTesting || globalIsCICD { immediatePurgeQueue = 1 } + + ctx, cancel := context.WithCancel(GlobalContext) + s = &xlStorage{ - drivePath: ep.Path, - endpoint: ep, - globalSync: globalFSOSync, - diskInfoCache: cachevalue.New[DiskInfo](), - immediatePurge: make(chan string, immediatePurgeQueue), + drivePath: ep.Path, + endpoint: ep, + globalSync: globalFSOSync, + diskInfoCache: cachevalue.New[DiskInfo](), + immediatePurge: make(chan string, immediatePurgeQueue), + immediatePurgeCancel: cancel, } defer func() { - if err == nil { - go s.cleanupTrashImmediateCallers(GlobalContext) + if cleanUp && err == nil { + go s.cleanupTrashImmediateCallers(ctx) } }() @@ -399,7 +404,8 @@ func (s *xlStorage) Endpoint() Endpoint { return s.endpoint } -func (*xlStorage) Close() error { +func (s *xlStorage) Close() error { + s.immediatePurgeCancel() return nil } @@ -1202,7 +1208,12 @@ func (s *xlStorage) cleanupTrashImmediateCallers(ctx context.Context) { case <-ctx.Done(): return case entry := <-s.immediatePurge: - removeAll(entry) + // Add deadlines such that immediate purge is not + // perpetually hung here. + w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) + w.Run(func() error { + return removeAll(entry) + }) } } }