From a5e13c696d1e1cb8e894a4133791c74470687553 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 31 Jan 2024 22:17:20 +0400 Subject: [PATCH] fix: retry blockdevice open in the installer We had these retries in other places, but not here. This seems to happen more frequently with Linux 6.6 update, the tl;dr is same: `udevd` tries to rescan the partition table at the wrong moment, preventing Talos installer to open the partition which was just created. It's a race, so workaround it by retrying the call. Signed-off-by: Andrey Smirnov --- cmd/installer/pkg/install/install.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/cmd/installer/pkg/install/install.go b/cmd/installer/pkg/install/install.go index aa5dc731a..7aecb3d69 100644 --- a/cmd/installer/pkg/install/install.go +++ b/cmd/installer/pkg/install/install.go @@ -9,9 +9,11 @@ import ( "fmt" "log" "os" + "time" "github.com/siderolabs/go-blockdevice/blockdevice" "github.com/siderolabs/go-procfs/procfs" + "github.com/siderolabs/go-retry/retry" "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" "github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/board" @@ -229,7 +231,7 @@ func (i *Installer) Install(ctx context.Context, mode Mode) (err error) { var bd *blockdevice.BlockDevice - bd, err = blockdevice.Open(device) + bd, err = retryBlockdeviceOpen(device) if err != nil { return err } @@ -368,3 +370,20 @@ func (i *Installer) runPreflightChecks(mode Mode) error { return checks.Run(ctx) } + +func retryBlockdeviceOpen(device string) (*blockdevice.BlockDevice, error) { + var bd *blockdevice.BlockDevice + + err := retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(func() error { + var openErr error + + bd, openErr = blockdevice.Open(device) + if openErr != nil && os.IsNotExist(openErr) { + return retry.ExpectedError(openErr) + } + + return openErr + }) + + return bd, err +}